aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/avx10_2-512bf16intrin.h681
-rw-r--r--gcc/config/i386/avx10_2-512convertintrin.h572
-rw-r--r--gcc/config/i386/avx10_2-512mediaintrin.h514
-rw-r--r--gcc/config/i386/avx10_2-512minmaxintrin.h489
-rw-r--r--gcc/config/i386/avx10_2-512satcvtintrin.h1575
-rw-r--r--gcc/config/i386/avx10_2bf16intrin.h614
-rw-r--r--gcc/config/i386/avx10_2convertintrin.h530
-rw-r--r--gcc/config/i386/avx10_2mediaintrin.h469
-rw-r--r--gcc/config/i386/avx10_2minmaxintrin.h448
-rw-r--r--gcc/config/i386/avx10_2satcvtintrin.h1779
-rw-r--r--gcc/config/i386/avx512bf16intrin.h19
-rw-r--r--gcc/config/i386/avx512bf16vlintrin.h4
-rw-r--r--gcc/config/i386/avx512bitalgintrin.h4
-rw-r--r--gcc/config/i386/avx512bitalgvlintrin.h4
-rw-r--r--gcc/config/i386/avx512bwintrin.h19
-rw-r--r--gcc/config/i386/avx512cdintrin.h2
-rw-r--r--gcc/config/i386/avx512dqintrin.h21
-rw-r--r--gcc/config/i386/avx512fintrin.h32
-rw-r--r--gcc/config/i386/avx512fp16intrin.h21
-rw-r--r--gcc/config/i386/avx512fp16vlintrin.h4
-rw-r--r--gcc/config/i386/avx512ifmaintrin.h4
-rw-r--r--gcc/config/i386/avx512ifmavlintrin.h4
-rw-r--r--gcc/config/i386/avx512vbmi2intrin.h4
-rw-r--r--gcc/config/i386/avx512vbmi2vlintrin.h4
-rw-r--r--gcc/config/i386/avx512vbmiintrin.h4
-rw-r--r--gcc/config/i386/avx512vbmivlintrin.h4
-rw-r--r--gcc/config/i386/avx512vlbwintrin.h4
-rw-r--r--gcc/config/i386/avx512vldqintrin.h4
-rw-r--r--gcc/config/i386/avx512vlintrin.h6
-rw-r--r--gcc/config/i386/avx512vnniintrin.h4
-rw-r--r--gcc/config/i386/avx512vnnivlintrin.h4
-rw-r--r--gcc/config/i386/avx512vp2intersectintrin.h4
-rw-r--r--gcc/config/i386/avx512vp2intersectvlintrin.h5
-rw-r--r--gcc/config/i386/avx512vpopcntdqintrin.h4
-rw-r--r--gcc/config/i386/avx512vpopcntdqvlintrin.h5
-rw-r--r--gcc/config/i386/cygming.h14
-rw-r--r--gcc/config/i386/driver-i386.cc36
-rwxr-xr-xgcc/config/i386/gcc-auto-profile29
-rw-r--r--gcc/config/i386/gfniintrin.h8
-rw-r--r--gcc/config/i386/host-mingw32.cc32
-rw-r--r--gcc/config/i386/i386-builtin.def1290
-rw-r--r--gcc/config/i386/i386-builtins.cc50
-rw-r--r--gcc/config/i386/i386-c.cc4
-rw-r--r--gcc/config/i386/i386-expand.cc353
-rw-r--r--gcc/config/i386/i386-features.cc969
-rw-r--r--gcc/config/i386/i386-features.h13
-rw-r--r--gcc/config/i386/i386-isa.def2
-rw-r--r--gcc/config/i386/i386-options.cc395
-rw-r--r--gcc/config/i386/i386-passes.def1
-rw-r--r--gcc/config/i386/i386-protos.h12
-rw-r--r--gcc/config/i386/i386.cc1425
-rw-r--r--gcc/config/i386/i386.h81
-rw-r--r--gcc/config/i386/i386.md600
-rw-r--r--gcc/config/i386/i386.opt45
-rw-r--r--gcc/config/i386/i386.opt.urls9
-rw-r--r--gcc/config/i386/immintrin.h10
-rw-r--r--gcc/config/i386/predicates.md35
-rw-r--r--gcc/config/i386/sse.md1383
-rw-r--r--gcc/config/i386/vaesintrin.h4
-rw-r--r--gcc/config/i386/vpclmulqdqintrin.h4
-rw-r--r--gcc/config/i386/x86-tune-costs.h441
-rw-r--r--gcc/config/i386/x86-tune-sched.cc28
-rw-r--r--gcc/config/i386/x86-tune.def9
63 files changed, 8551 insertions, 6596 deletions
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h
deleted file mode 100644
index 21e4b36..0000000
--- a/gcc/config/i386/avx10_2-512bf16intrin.h
+++ /dev/null
@@ -1,681 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED
-#define _AVX10_2_512BF16INTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_addbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_subbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_mulbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_divbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_maxbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_minbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_scalefbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rsqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rsqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_sqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_sqrtbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_sqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rcpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rcpbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rcpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_getexpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_getexpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-/* Intrinsics vrndscalebf16. */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_pbh (__m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_rndscalebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_rndscalebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-#else
-#define _mm512_roundscale_pbh(A, B) \
- (__builtin_ia32_rndscalebf16512_mask ((A), (B), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
-
-#define _mm512_mask_roundscale_pbh(A, B, C, D) \
- (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_roundscale_pbh(A, B, C) \
- (__builtin_ia32_rndscalebf16512_mask ((B), (C), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vreducebf16. */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_pbh (__m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_reducebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_reducebf16512_mask (__A, B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_reducebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-#else
-#define _mm512_reduce_pbh(A, B) \
- (__builtin_ia32_reducebf16512_mask ((A), (B), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
-
-#define _mm512_mask_reduce_pbh(A, B, C, D) \
- (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_reduce_pbh(A, B, C) \
- (__builtin_ia32_reducebf16512_mask ((B), (C), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vgetmantbf16. */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512bh)
- __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512bh)
- __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
- __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512bh)
- __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-#else
-#define _mm512_getmant_pbh(A, B, C) \
- (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
-
-#define _mm512_mask_getmant_pbh(A, B, C, D, E) \
- (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
-
-#define _mm512_maskz_getmant_pbh(A, B, C, D) \
- (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfpclassbf16. */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
- const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_fpclassbf16512_mask (__A, __imm,
- (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_fpclass_pbh_mask(U, X, C) \
- ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
- (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
-
-#define _mm512_fpclass_pbh_mask(X, C) \
- ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
- (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
-#endif /* __OPIMTIZE__ */
-
-
-/* Intrinsics vcmpbf16. */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
- const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
- (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \
- ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
-
-#define _mm512_cmp_pbh_mask(A, B, C) \
- ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
-
-#endif /* __OPIMTIZE__ */
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h
deleted file mode 100644
index 611a40d..0000000
--- a/gcc/config/i386/avx10_2-512convertintrin.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead."
-#endif // _IMMINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2__
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
- __m512 __B)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf) __W,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
- __m512 __B, const int __R)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
- __m512 __B, const int __R)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U,
- __R);
-}
-
-#else
-#define _mm512_cvtx_round2ps_ph(A, B, R) \
- ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (__v32hf) \
- (_mm512_setzero_ph ()), \
- (__mmask32) (-1), \
- (R)))
-#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
- ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (__v32hf) (W), \
- (__mmask32) (U), \
- (R)))
-#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
- ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (__v32hf) \
- (_mm512_setzero_ph ()), \
- (__mmask32) (U), \
- (R)))
-#endif /* __OPTIMIZE__ */
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
- __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
- __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
- __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
- __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvthf8_ph (__m256i __A)
-{
- return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
- (__v32hf) (__m512h)
- _mm512_undefined_ph (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
-{
- return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
- (__v32hf) (__m512h) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
-{
- return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
- (__v32hf) (__m512h)
- _mm512_setzero_ph (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_bf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_ph_bf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_hf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_ph_hf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbf8_ph (__m256i __A)
-{
- return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
- (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
-{
- return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
- (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
-{
- return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
- (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
-}
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h
deleted file mode 100644
index 43271e7..0000000
--- a/gcc/config/i386/avx10_2-512mediaintrin.h
+++ /dev/null
@@ -1,514 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-
-#if !defined(__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
-{
- return (__m512)
- __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
- (__v16sf) __A,
- (__v16sf) __B,
- (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
- __m512h __B)
-{
- return (__m512)
- __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
- (__v16sf) __A,
- (__v16sf) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
- __m512h __B)
-{
- return (__m512)
- __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
- (__v16sf) __A,
- (__v16sf) __B,
- (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
-{
- return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
- (__v64qi) __Y,
- __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
- __m512i __Y, const int __M)
-{
- return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
- (__v64qi) __Y,
- __M,
- (__v32hi) __W,
- __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
- __m512i __Y, const int __M)
-{
- return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
- (__v64qi) __Y,
- __M,
- (__v32hi) _mm512_setzero_epi32 (),
- __U);
-}
-#else
-#define _mm512_mpsadbw_epu8(X, Y, M) \
- (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \
- (__v64qi)(__m512i)(Y), (int)(M))
-
-#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \
- (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
- (__v64qi)(__m512i)(Y), \
- (int)(M), \
- (__v32hi)(__m512i)(W), \
- (__mmask32)(U))
-
-#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \
- (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
- (__v64qi)(__m512i)(Y), \
- (int)(M), \
- (__v32hi) _mm512_setzero_epi32 (), \
- (__mmask32)(U))
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h
deleted file mode 100644
index a743346..0000000
--- a/gcc/config/i386/avx10_2-512minmaxintrin.h
+++ /dev/null
@@ -1,489 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-#ifdef __OPTIMIZE__
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
-{
- return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf)(__m512bh)
- _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B, const int __C)
-{
- return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, const int __C)
-{
- return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf)(__m512bh)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_undefined_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
- __m512d __B, const int __C)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
- const int __C)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
- const int __R)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_undefined_pd (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
- __m512d __B, const int __C, const int __R)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
- const int __C, const int __R)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_undefined_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
- __m512h __B, const int __C)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf) __W,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
- const int __C)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_undefined_ph (),
- (__mmask32) -1, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
- __m512h __B, const int __C, const int __R)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf) __W,
- (__mmask32) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- const int __C, const int __R)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_undefined_ps (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
- __m512 __B, const int __C)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
- const int __C)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_undefined_ps (),
- (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
- __m512 __B, const int __C, const int __R)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf) __W,
- (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
- const int __C, const int __R)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U, __R);
-}
-
-#else
-#define _mm512_minmax_pbh(A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) \
- _mm512_setzero_si512 (), \
- (__mmask32) (-1)))
-
-#define _mm512_mask_minmax_pbh(W, U, A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) (W), \
- (__mmask32) (U)))
-
-#define _mm512_maskz_minmax_pbh(U, A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) \
- _mm512_setzero_si512 (), \
- (__mmask32) (U)))
-
-#define _mm512_minmax_round_pd(A, B, C, R) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_undefined_pd (), \
- (__mmask8) (-1), \
- (int) (R)))
-
-#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) (W), \
- (__mmask8) (U), \
- (int) (R)))
-
-#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_setzero_pd (), \
- (__mmask8) (U), \
- (int) (R)))
-
-#define _mm512_minmax_round_ph(A, B, C, R) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_undefined_ph (), \
- (__mmask32) (-1), \
- (int) (R)))
-
-#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) (W), \
- (__mmask32) (U), \
- (int) (R)))
-
-#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_setzero_ph (), \
- (__mmask32) (U), \
- (int) (R)))
-
-#define _mm512_minmax_round_ps(A, B, C, R) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_undefined_ps (), \
- (__mmask16) (-1), \
- (int) (R)))
-
-#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) (W), \
- (__mmask16) (U), \
- (int) (R)))
-
-#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_setzero_ps (), \
- (__mmask16) (U), \
- (int) (R)))
-
-#define _mm512_minmax_pd(A, B, C) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_undefined_pd (), \
- (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_pd(W, U, A, B, C) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) (W), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_pd(U, A, B, C) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_setzero_pd (), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ph(A, B, C) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_undefined_ph (), \
- (__mmask32) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ph(W, U, A, B, C) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) (W), \
- (__mmask32) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ph(U, A, B, C) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_setzero_ph (), \
- (__mmask32) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ps(A, B, C) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_undefined_ps (), \
- (__mmask16) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ps(W, U, A, B, C) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) (W), \
- (__mmask16) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ps(U, A, B, C) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_setzero_ps (), \
- (__mmask16) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h
deleted file mode 100644
index 215b7fd..0000000
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ /dev/null
@@ -1,1575 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_bf16_epi8 (__m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_bf16_epu8 (__m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
-{
- return (__m512i)
- __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
- (__v32hi) _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
-{
- return (__m512i)
- __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ph_epi8 (__m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ph_epu8 (__m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ps_epi8 (__m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ps_epu8 (__m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ph_epi8 (__m512h __A)
-{
- return (__m512i)
- __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ph_epu8 (__m512h __A)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ps_epi8 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ps_epu8 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epi32 (__m512d __A)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
- return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epi64 (__m512d __A)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
- return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
-{
- return
- (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epu32 (__m512d __A)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
- return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epu64 (__m512d __A)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
- return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epi32 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epi64 (__m256 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
-{
- return
- (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epu32 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epu64 (__m256 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
-{
- return
- (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-#else
-#define _mm512_ipcvts_roundph_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvts_roundph_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvts_roundps_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_ipcvts_roundps_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundph_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundph_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundps_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundps_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epi32(A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_undefined_si256 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
- ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
- (__v8si) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_setzero_si256 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epi64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epu32(A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_undefined_si256 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
- ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
- (__v8si) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_setzero_si256 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epu64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epi32(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epi64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epu32(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epu64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h
index e6890fc..9560480 100644
--- a/gcc/config/i386/avx10_2bf16intrin.h
+++ b/gcc/config/i386/avx10_2bf16intrin.h
@@ -34,6 +34,32 @@
#define __DISABLE_AVX10_2__
#endif /* __AVX10_2__ */
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_addbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_pbh (__m256bh __A, __m256bh __B)
@@ -86,6 +112,32 @@ _mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_subbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_pbh (__m256bh __A, __m256bh __B)
@@ -138,6 +190,32 @@ _mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_mulbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_pbh (__m256bh __A, __m256bh __B)
@@ -190,6 +268,32 @@ _mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_divbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_pbh (__m256bh __A, __m256bh __B)
@@ -242,6 +346,32 @@ _mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_maxbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_pbh (__m256bh __A, __m256bh __B)
@@ -294,6 +424,32 @@ _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_minbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_pbh (__m256bh __A, __m256bh __B)
@@ -346,6 +502,32 @@ _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_scalefbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_scalef_pbh (__m256bh __A, __m256bh __B)
@@ -398,6 +580,41 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -468,6 +685,41 @@ _mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -537,6 +789,41 @@ _mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -607,6 +894,41 @@ _mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -677,6 +999,35 @@ _mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rsqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rsqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rsqrt_pbh (__m256bh __A)
@@ -733,6 +1084,34 @@ _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_sqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_sqrtbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_sqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sqrt_pbh (__m256bh __A)
@@ -789,6 +1168,34 @@ _mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rcpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rcpbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rcpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_pbh (__m256bh __A)
@@ -845,6 +1252,33 @@ _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_getexpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_getexpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getexp_pbh (__m256bh __A)
@@ -903,6 +1337,34 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A)
/* Intrinsics vrndscalebf16. */
#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pbh (__m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_rndscalebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_rndscalebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_roundscale_pbh (__m256bh __A, int B)
@@ -962,6 +1424,19 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
}
#else
+#define _mm512_roundscale_pbh(A, B) \
+ (__builtin_ia32_rndscalebf16512_mask ((A), (B), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
+
+#define _mm512_mask_roundscale_pbh(A, B, C, D) \
+ (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_roundscale_pbh(A, B, C) \
+ (__builtin_ia32_rndscalebf16512_mask ((B), (C), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
+
#define _mm256_roundscale_pbh(A, B) \
(__builtin_ia32_rndscalebf16256_mask ((A), (B), \
(__v16bf) _mm256_setzero_si256 (), \
@@ -992,6 +1467,35 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
/* Intrinsics vreducebf16. */
#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_pbh (__m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_reducebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_reducebf16512_mask (__A, B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_reducebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_reduce_pbh (__m256bh __A, int B)
@@ -1051,6 +1555,19 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
}
#else
+#define _mm512_reduce_pbh(A, B) \
+ (__builtin_ia32_reducebf16512_mask ((A), (B), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
+
+#define _mm512_mask_reduce_pbh(A, B, C, D) \
+ (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_reduce_pbh(A, B, C) \
+ (__builtin_ia32_reducebf16512_mask ((B), (C), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
+
#define _mm256_reduce_pbh(A, B) \
(__builtin_ia32_reducebf16256_mask ((A), (B), \
(__v16bf) _mm256_setzero_si256 (), \
@@ -1082,6 +1599,40 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
/* Intrinsics vgetmantbf16. */
#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512bh)
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512bh)
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512bh)
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B,
@@ -1151,6 +1702,19 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
}
#else
+#define _mm512_getmant_pbh(A, B, C) \
+ (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
+
+#define _mm512_mask_getmant_pbh(A, B, C, D, E) \
+ (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+
+#define _mm512_maskz_getmant_pbh(A, B, C, D) \
+ (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
+
#define _mm256_getmant_pbh(A, B, C) \
(__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \
(__v16bf) _mm256_setzero_si256 (), \
@@ -1180,6 +1744,24 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
/* Intrinsics vfpclassbf16. */
#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
+ const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_fpclassbf16512_mask (__A, __imm,
+ (__mmask32) -1);
+}
+
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A,
@@ -1214,6 +1796,14 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
}
#else
+#define _mm512_mask_fpclass_pbh_mask(U, X, C) \
+ ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
+ (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
+
+#define _mm512_fpclass_pbh_mask(X, C) \
+ ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
+ (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
+
#define _mm256_mask_fpclass_pbh_mask(U, A, B) \
((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U)))
@@ -1233,6 +1823,24 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
/* Intrinsics vcmpbf16. */
#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
+ const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
+ (__mmask32) -1);
+}
+
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A,
@@ -1268,6 +1876,12 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm)
}
#else
+#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \
+ ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
+
+#define _mm512_cmp_pbh_mask(A, B, C) \
+ ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
+
#define _mm256_mask_cmp_pbh_mask(A, B, C, D) \
((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A)))
diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h
index 8cbdc66..f2fb98f 100644
--- a/gcc/config/i386/avx10_2convertintrin.h
+++ b/gcc/config/i386/avx10_2convertintrin.h
@@ -98,6 +98,103 @@ _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B)
(__mmask16) __U);
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ __R);
+}
+
+#else
+#define _mm512_cvtx_round2ps_ph(A, B, R) \
+ ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (__v32hf) \
+ (_mm512_setzero_ph ()), \
+ (__mmask32) (-1), \
+ (R)))
+#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
+ ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (__v32hf) (W), \
+ (__mmask32) (U), \
+ (R)))
+#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
+ ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (__v32hf) \
+ (_mm512_setzero_ph ()), \
+ (__mmask32) (U), \
+ (R)))
+#endif /* __OPTIMIZE__ */
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbiasph_bf8 (__m128i __A, __m128h __B)
@@ -161,6 +258,39 @@ _mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
+ __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_biasph_bf8 (__m128i __A, __m128h __B)
@@ -224,6 +354,39 @@ _mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
+ __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbiasph_hf8 (__m128i __A, __m128h __B)
@@ -287,6 +450,39 @@ _mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
+ __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_biasph_hf8 (__m128i __A, __m128h __B)
@@ -350,6 +546,39 @@ _mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
+ __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt2ph_bf8 (__m128h __A, __m128h __B)
@@ -416,6 +645,39 @@ _mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_2ph_bf8 (__m128h __A, __m128h __B)
@@ -482,6 +744,39 @@ _mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt2ph_hf8 (__m128h __A, __m128h __B)
@@ -548,6 +843,39 @@ _mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_2ph_hf8 (__m128h __A, __m128h __B)
@@ -614,6 +942,39 @@ _mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvthf8_ph (__m128i __A)
@@ -672,6 +1033,35 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A)
(__mmask16) __U);
}
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvthf8_ph (__m256i __A)
+{
+ return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+ (__v32hf) (__m512h)
+ _mm512_undefined_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
+{
+ return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+ (__v32hf) (__m512h) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
+{
+ return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+ (__v32hf) (__m512h)
+ _mm512_setzero_ph (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtph_bf8 (__m128h __A)
@@ -730,6 +1120,35 @@ _mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_bf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_ph_bf8 (__m128h __A)
@@ -788,6 +1207,35 @@ _mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_bf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtph_hf8 (__m128h __A)
@@ -846,6 +1294,35 @@ _mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_hf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_ph_hf8 (__m128h __A)
@@ -904,6 +1381,35 @@ _mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_hf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbf8_ph (__m128i __A)
@@ -952,6 +1458,30 @@ _mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A)
(__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8));
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbf8_ph (__m256i __A)
+{
+ return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+ (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
+{
+ return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
+ (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
+{
+ return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+ (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
+}
+
#ifdef __DISABLE_AVX10_2__
#undef __DISABLE_AVX10_2__
#pragma GCC pop_options
diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h
index 0993e8e..7d30502 100644
--- a/gcc/config/i386/avx10_2mediaintrin.h
+++ b/gcc/config/i386/avx10_2mediaintrin.h
@@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W,
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U,
@@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W,
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
+{
+ return (__m512)
+ __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
+ __m512h __B)
+{
+ return (__m512)
+ __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
+ __m512h __B)
+{
+ return (__m512)
+ __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B)
@@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
(__v16hi) _mm256_setzero_si256 (),
__U);
}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
+{
+ return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
+ (__v64qi) __Y,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
+ __m512i __Y, const int __M)
+{
+ return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ __M,
+ (__v32hi) __W,
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
+ __m512i __Y, const int __M)
+{
+ return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ __M,
+ (__v32hi) _mm512_setzero_epi32 (),
+ __U);
+}
#else
#define _mm_mask_mpsadbw_epu8(W, U, X, Y, M) \
(__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X), \
@@ -829,6 +1281,23 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
(__v16hi) _mm256_setzero_si256 (), \
(__mmask16)(U))
+#define _mm512_mpsadbw_epu8(X, Y, M) \
+ (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(M))
+
+#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \
+ (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), \
+ (int)(M), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U))
+
+#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \
+ (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), \
+ (int)(M), \
+ (__v32hi) _mm512_setzero_epi32 (), \
+ (__mmask32)(U))
#endif
#ifdef __DISABLE_AVX10_2__
diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h
index 0a4a253..f9fe14e 100644
--- a/gcc/config/i386/avx10_2minmaxintrin.h
+++ b/gcc/config/i386/avx10_2minmaxintrin.h
@@ -103,6 +103,43 @@ _mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A,
(__mmask16) __U);
}
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_pd (__m128d __A, __m128d __B, const int __C)
@@ -169,6 +206,84 @@ _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
(__mmask8) __U);
}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_ph (__m128h __A, __m128h __B, const int __C)
@@ -235,6 +350,83 @@ _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
(__mmask16) __U);
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U, __R);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_ps (__m128 __A, __m128 __B, const int __C)
@@ -301,6 +493,83 @@ _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
(__mmask8) __U);
}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_sd (__m128d __A, __m128d __B, const int __C)
@@ -580,6 +849,29 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_si256 (), \
(__mmask16) (U)))
+#define _mm512_minmax_pbh(A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_pbh(W, U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) (W), \
+ (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_pbh(U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (U)))
+
#define _mm_minmax_pd(A, B, C) \
((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
(__v2df) (B), \
@@ -626,6 +918,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_pd (), \
(__mmask8) (U)))
+#define _mm512_minmax_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
#define _mm_minmax_ph(A, B, C) \
((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
(__v8hf) (B), \
@@ -672,6 +1016,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_ph (), \
(__mmask16) (U)))
+#define _mm512_minmax_ph(A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ph(A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ (int) (R)))
+
#define _mm_minmax_ps(A, B, C) \
((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
(__v4sf) (B), \
@@ -718,6 +1114,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_ps (), \
(__mmask8) (U)))
+#define _mm512_minmax_ps(A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ (int) (R)))
+
#define _mm_minmax_round_sd(A, B, C, R) \
((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
(__v2df) (B), \
diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h
index 78bcd72..c4fa19b 100644
--- a/gcc/config/i386/avx10_2satcvtintrin.h
+++ b/gcc/config/i386/avx10_2satcvtintrin.h
@@ -63,37 +63,6 @@ _mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A)
(__mmask8) __U);
}
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvts_bf16_epi8 (__m256bh __A)
-{
- return
- (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
- (__v16hi)
- _mm256_undefined_si256 (),
- (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
-{
- return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
- (__v16hi) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
-{
- return
- (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
- (__v16hi)
- _mm256_setzero_si256 (),
- (__mmask16) __U);
-}
-
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ipcvts_bf16_epu8 (__m128bh __A)
@@ -127,6 +96,37 @@ _mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ipcvts_bf16_epi8 (__m256bh __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
+{
+ return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_ipcvts_bf16_epu8 (__m256bh __A)
{
return
@@ -156,120 +156,66 @@ _mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A)
(__mmask16) __U);
}
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ph_epi8 (__m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_undefined_si128 (),
- (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
- (__v8hi) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_setzero_si128 (),
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ph_epu8 (__m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_undefined_si128 (),
- (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
- (__v8hi) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_setzero_si128 (),
- (__mmask8) __U);
-}
-
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ps_epi8 (__m128 __A)
+_mm512_ipcvts_bf16_epi8 (__m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_undefined_si128 (),
- (__mmask8) -1);
+ return
+ (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
- (__v4si) __W,
- (__mmask8) __U);
+ return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
+_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_setzero_si128 (),
- (__mmask8) __U);
+ return
+ (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ps_epu8 (__m128 __A)
+_mm512_ipcvts_bf16_epu8 (__m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_undefined_si128 (),
- (__mmask8) -1);
+ return
+ (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
- (__v4si) __W,
- (__mmask8) __U);
+ return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
+_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_setzero_si128 (),
- (__mmask8) __U);
+ return
+ (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
}
extern __inline __m128i
@@ -390,6 +336,183 @@ _mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A)
(__mmask16) __U);
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi) _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epi8 (__m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epu8 (__m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ps_epi8 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ps_epu8 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ipcvtts_ph_epi8 (__m128h __A)
@@ -1234,6 +1357,1416 @@ _mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A)
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epi8 (__m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epu8 (__m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epi8 (__m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epu8 (__m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epi8 (__m512h __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epu8 (__m512h __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epi8 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epu8 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi32 (__m512d __A)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi64 (__m512d __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu32 (__m512d __A)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu64 (__m512d __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi32 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi64 (__m256 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu32 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu64 (__m256 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+#else
+#define _mm512_ipcvts_roundph_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvts_roundph_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvts_roundps_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_ipcvts_roundps_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundph_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundph_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundps_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundps_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+#endif
+
extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtts_sd_epi32 (__m128d __A)
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 6740109..6c087e6 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BF16INTRIN_H_INCLUDED
#define _AVX512BF16INTRIN_H_INCLUDED
-#if !defined (__AVX512BF16__) || defined (__EVEX512__)
+#if !defined (__AVX512BF16__)
#pragma GCC push_options
-#pragma GCC target("avx512bf16,no-evex512")
+#pragma GCC target("avx512bf16")
#define __DISABLE_AVX512BF16__
#endif /* __AVX512BF16__ */
@@ -42,17 +42,6 @@ _mm_cvtsbh_ss (__bf16 __A)
return __builtin_ia32_cvtbf2sf (__A);
}
-#ifdef __DISABLE_AVX512BF16__
-#undef __DISABLE_AVX512BF16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BF16__ */
-
-#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bf16,evex512")
-#define __DISABLE_AVX512BF16_512__
-#endif /* __AVX512BF16_512__ */
-
/* Internal data types for implementing the intrinsics. */
typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
@@ -155,8 +144,8 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
(__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
}
-#ifdef __DISABLE_AVX512BF16_512__
-#undef __DISABLE_AVX512BF16_512__
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BF16_512__ */
diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h
index ffaceac..fd6d183 100644
--- a/gcc/config/i386/avx512bf16vlintrin.h
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
#define _AVX512BF16VLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
#pragma GCC push_options
-#pragma GCC target("avx512bf16,avx512vl,no-evex512")
+#pragma GCC target("avx512bf16,avx512vl")
#define __DISABLE_AVX512BF16VL__
#endif /* __AVX512BF16__ */
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
index 301f125..d7156f9 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
#define _AVX512BITALGINTRIN_H_INCLUDED
-#if !defined (__AVX512BITALG__) || !defined (__EVEX512__)
+#if !defined (__AVX512BITALG__)
#pragma GCC push_options
-#pragma GCC target("avx512bitalg,evex512")
+#pragma GCC target("avx512bitalg")
#define __DISABLE_AVX512BITALG__
#endif /* __AVX512BITALG__ */
diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
index e4883cf..cf9cff6 100644
--- a/gcc/config/i386/avx512bitalgvlintrin.h
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BITALGVLINTRIN_H_INCLUDED
#define _AVX512BITALGVLINTRIN_H_INCLUDED
-#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || defined (__EVEX512__)
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512vl,no-evex512")
+#pragma GCC target("avx512bitalg,avx512vl")
#define __DISABLE_AVX512BITALGVL__
#endif /* __AVX512BITALGVL__ */
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 47c4c03..5e9eeaa 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BWINTRIN_H_INCLUDED
#define _AVX512BWINTRIN_H_INCLUDED
-#if !defined (__AVX512BW__) || defined (__EVEX512__)
+#if !defined (__AVX512BW__)
#pragma GCC push_options
-#pragma GCC target("avx512bw,no-evex512")
+#pragma GCC target("avx512bw")
#define __DISABLE_AVX512BW__
#endif /* __AVX512BW__ */
@@ -346,17 +346,6 @@ _kandn_mask64 (__mmask64 __A, __mmask64 __B)
return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B);
}
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
-
-#if !defined (__AVX512BW__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bw,evex512")
-#define __DISABLE_AVX512BW_512__
-#endif /* __AVX512BW_512__ */
-
/* Internal data types for implementing the intrinsics. */
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \
@@ -3369,8 +3358,8 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
#endif
-#ifdef __DISABLE_AVX512BW_512__
-#undef __DISABLE_AVX512BW_512__
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BW_512__ */
diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h
index 206cc49..5a92d25 100644
--- a/gcc/config/i386/avx512cdintrin.h
+++ b/gcc/config/i386/avx512cdintrin.h
@@ -30,7 +30,7 @@
#ifndef __AVX512CD__
#pragma GCC push_options
-#pragma GCC target("avx512cd,evex512")
+#pragma GCC target("avx512cd")
#define __DISABLE_AVX512CD__
#endif /* __AVX512CD__ */
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 1d10225..a7766b5 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512DQINTRIN_H_INCLUDED
#define _AVX512DQINTRIN_H_INCLUDED
-#if !defined (__AVX512DQ__) || defined (__EVEX512__)
+#if !defined (__AVX512DQ__)
#pragma GCC push_options
-#pragma GCC target("avx512dq,no-evex512")
+#pragma GCC target("avx512dq")
#define __DISABLE_AVX512DQ__
#endif /* __AVX512DQ__ */
@@ -639,17 +639,6 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
#endif
-#ifdef __DISABLE_AVX512DQ__
-#undef __DISABLE_AVX512DQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ__ */
-
-#if !defined (__AVX512DQ__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512dq,evex512")
-#define __DISABLE_AVX512DQ_512__
-#endif /* __AVX512DQ_512__ */
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f64x2 (__m128d __A)
@@ -2897,9 +2886,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
#endif
-#ifdef __DISABLE_AVX512DQ_512__
-#undef __DISABLE_AVX512DQ_512__
+#ifdef __DISABLE_AVX512DQ__
+#undef __DISABLE_AVX512DQ__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ_512__ */
+#endif /* __DISABLE_AVX512DQ__ */
#endif /* _AVX512DQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 9160787..4469f73 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512FINTRIN_H_INCLUDED
#define _AVX512FINTRIN_H_INCLUDED
-#if !defined (__AVX512F__) || defined (__EVEX512__)
+#if !defined (__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("avx512f,no-evex512")
+#pragma GCC target("avx512f")
#define __DISABLE_AVX512F__
#endif /* __AVX512F__ */
@@ -54,11 +54,12 @@ typedef enum
_MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
} _MM_MANTISSA_SIGN_ENUM;
-/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
- from AVX2 or before. We need to add them to prevent target option mismatch
- when calling AVX512 intrins implemented with these intrins under no-evex512
- function attribute. All AVX512 intrins calling those AVX2 intrins or
- before will change their calls to these AVX512 version. */
+/* These _mm{,256}_avx512* intrins are initially duplicated from their
+ _mm{,256}_* forms from AVX2 or before. At that time, e need to add them
+ to prevent target option mismatch when calling AVX512 intrins implemented
+ with these intrins under no-evex512 function attribute. Thess intrins will
+ still be here to avoid huge changes. All AVX512 intrins calling those AVX2
+ intrins or before have changed their calls to these AVX512 version. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avx512_undefined_ps (void)
{
@@ -3802,17 +3803,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
#endif
-#ifdef __DISABLE_AVX512F__
-#undef __DISABLE_AVX512F__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F__ */
-
-#if !defined (__AVX512F__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512f,evex512")
-#define __DISABLE_AVX512F_512__
-#endif /* __AVX512F_512__ */
-
/* Internal data types for implementing the intrinsics. */
typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
@@ -16609,9 +16599,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
#undef __MM512_REDUCE_OP
-#ifdef __DISABLE_AVX512F_512__
-#undef __DISABLE_AVX512F_512__
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F_512__ */
+#endif /* __DISABLE_AVX512F__ */
#endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index f158f87..471ec05 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512FP16INTRIN_H_INCLUDED
#define _AVX512FP16INTRIN_H_INCLUDED
-#if !defined (__AVX512FP16__) || defined (__EVEX512__)
+#if !defined (__AVX512FP16__)
#pragma GCC push_options
-#pragma GCC target("avx512fp16,no-evex512")
+#pragma GCC target("avx512fp16")
#define __DISABLE_AVX512FP16__
#endif /* __AVX512FP16__ */
@@ -2852,17 +2852,6 @@ _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
#define _mm_maskz_cmul_round_sch(U, A, B, R) \
_mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
-#ifdef __DISABLE_AVX512FP16__
-#undef __DISABLE_AVX512FP16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16__ */
-
-#if !defined (__AVX512FP16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512fp16,evex512")
-#define __DISABLE_AVX512FP16_512__
-#endif /* __AVX512FP16_512__ */
-
typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
@@ -7238,9 +7227,9 @@ _mm512_set1_pch (_Float16 _Complex __A)
#define _mm512_maskz_cmul_round_pch(U, A, B, R) \
_mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
-#ifdef __DISABLE_AVX512FP16_512__
-#undef __DISABLE_AVX512FP16_512__
+#ifdef __DISABLE_AVX512FP16__
+#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16_512__ */
+#endif /* __DISABLE_AVX512FP16__ */
#endif /* _AVX512FP16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 59e6c88..cb98310 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
#define __AVX512FP16VLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
#pragma GCC push_options
-#pragma GCC target("avx512fp16,avx512vl,no-evex512")
+#pragma GCC target("avx512fp16,avx512vl")
#define __DISABLE_AVX512FP16VL__
#endif /* __AVX512FP16VL__ */
diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h
index ed97350..56790c0 100644
--- a/gcc/config/i386/avx512ifmaintrin.h
+++ b/gcc/config/i386/avx512ifmaintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
#define _AVX512IFMAINTRIN_H_INCLUDED
-#if !defined (__AVX512IFMA__) || !defined (__EVEX512__)
+#if !defined (__AVX512IFMA__)
#pragma GCC push_options
-#pragma GCC target("avx512ifma,evex512")
+#pragma GCC target("avx512ifma")
#define __DISABLE_AVX512IFMA__
#endif /* __AVX512IFMA__ */
diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h
index 681bda3..6b849c8 100644
--- a/gcc/config/i386/avx512ifmavlintrin.h
+++ b/gcc/config/i386/avx512ifmavlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
#define _AVX512IFMAVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
#pragma GCC push_options
-#pragma GCC target("avx512ifma,avx512vl,no-evex512")
+#pragma GCC target("avx512ifma,avx512vl")
#define __DISABLE_AVX512IFMAVL__
#endif /* __AVX512IFMAVL__ */
diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index f5515a8..e8bfe1d 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
#define __AVX512VBMI2INTRIN_H_INCLUDED
-#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__)
+#if !defined(__AVX512VBMI2__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi2,evex512")
+#pragma GCC target("avx512vbmi2")
#define __DISABLE_AVX512VBMI2__
#endif /* __AVX512VBMI2__ */
diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h
index e9857ba..5cdfebd 100644
--- a/gcc/config/i386/avx512vbmi2vlintrin.h
+++ b/gcc/config/i386/avx512vbmi2vlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
#define _AVX512VBMI2VLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi2,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi2,avx512vl")
#define __DISABLE_AVX512VBMI2VL__
#endif /* __AVX512VBMIVL__ */
diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h
index 901a2f7..5f5e342 100644
--- a/gcc/config/i386/avx512vbmiintrin.h
+++ b/gcc/config/i386/avx512vbmiintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
#define _AVX512VBMIINTRIN_H_INCLUDED
-#if !defined (__AVX512VBMI__) || !defined (__EVEX512__)
+#if !defined (__AVX512VBMI__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi,evex512")
+#pragma GCC target("avx512vbmi")
#define __DISABLE_AVX512VBMI__
#endif /* __AVX512VBMI__ */
diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h
index 90cd590..037ea93 100644
--- a/gcc/config/i386/avx512vbmivlintrin.h
+++ b/gcc/config/i386/avx512vbmivlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
#define _AVX512VBMIVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi,avx512vl")
#define __DISABLE_AVX512VBMIVL__
#endif /* __AVX512VBMIVL__ */
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 9f0a5b4..537e408 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VLBWINTRIN_H_INCLUDED
#define _AVX512VLBWINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512bw,no-evex512")
+#pragma GCC target("avx512vl,avx512bw")
#define __DISABLE_AVX512VLBW__
#endif /* __AVX512VLBW__ */
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
index 3b23d4a..5783dbe 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VLDQINTRIN_H_INCLUDED
#define _AVX512VLDQINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq,no-evex512")
+#pragma GCC target("avx512vl,avx512dq")
#define __DISABLE_AVX512VLDQ__
#endif /* __AVX512VLDQ__ */
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 4451a1f..50930cd 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VLINTRIN_H_INCLUDED
#define _AVX512VLINTRIN_H_INCLUDED
-#if !defined (__AVX512VL__) || defined (__EVEX512__)
+#if !defined (__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,no-evex512")
+#pragma GCC target("avx512vl")
#define __DISABLE_AVX512VL__
#endif /* __AVX512VL__ */
@@ -13650,7 +13650,7 @@ _mm256_permutex_pd (__m256d __X, const int __M)
#if !defined (__AVX512CD__) || !defined (__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512cd,no-evex512")
+#pragma GCC target("avx512vl,avx512cd")
#define __DISABLE_AVX512VLCD__
#endif
diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h
index 5d0eaff..fe7b663 100644
--- a/gcc/config/i386/avx512vnniintrin.h
+++ b/gcc/config/i386/avx512vnniintrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
#define __AVX512VNNIINTRIN_H_INCLUDED
-#if !defined(__AVX512VNNI__) || !defined (__EVEX512__)
+#if !defined(__AVX512VNNI__)
#pragma GCC push_options
-#pragma GCC target("avx512vnni,evex512")
+#pragma GCC target("avx512vnni")
#define __DISABLE_AVX512VNNI__
#endif /* __AVX512VNNI__ */
diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h
index 7774bbd..01c3c91 100644
--- a/gcc/config/i386/avx512vnnivlintrin.h
+++ b/gcc/config/i386/avx512vnnivlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
#define _AVX512VNNIVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
#pragma GCC push_options
-#pragma GCC target("avx512vnni,avx512vl,no-evex512")
+#pragma GCC target("avx512vnni,avx512vl")
#define __DISABLE_AVX512VNNIVL__
#endif /* __AVX512VNNIVL__ */
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index e170cf5..50f7ead 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
-#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__)
#pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,evex512")
+#pragma GCC target("avx512vp2intersect")
#define __DISABLE_AVX512VP2INTERSECT__
#endif /* __AVX512VP2INTERSECT__ */
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
index afdd2da..3e0a8ab 100644
--- a/gcc/config/i386/avx512vp2intersectvlintrin.h
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -28,10 +28,9 @@
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) \
- || defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,avx512vl,no-evex512")
+#pragma GCC target("avx512vp2intersect,avx512vl")
#define __DISABLE_AVX512VP2INTERSECTVL__
#endif /* __AVX512VP2INTERSECTVL__ */
diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h
index 3357255..e4b89ea 100644
--- a/gcc/config/i386/avx512vpopcntdqintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
-#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__)
+#if !defined (__AVX512VPOPCNTDQ__)
#pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,evex512")
+#pragma GCC target("avx512vpopcntdq")
#define __DISABLE_AVX512VPOPCNTDQ__
#endif /* __AVX512VPOPCNTDQ__ */
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
index 17d836f..8eb1d42 100644
--- a/gcc/config/i386/avx512vpopcntdqvlintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -28,10 +28,9 @@
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) \
- || defined (__EVEX512__)
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,avx512vl,no-evex512")
+#pragma GCC target("avx512vpopcntdq,avx512vl")
#define __DISABLE_AVX512VPOPCNTDQVL__
#endif /* __AVX512VPOPCNTDQVL__ */
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 3ddcbec..0a3173c 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -28,16 +28,15 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_SEH
#define TARGET_SEH (TARGET_64BIT_MS_ABI && flag_unwind_tables)
+#undef PREFERRED_STACK_BOUNDARY_DEFAULT
+#define PREFERRED_STACK_BOUNDARY_DEFAULT \
+ (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY)
+
/* Win64 with SEH cannot represent DRAP stack frames. Disable its use.
Force the use of different mechanisms to allocate aligned local data. */
#undef MAX_STACK_ALIGNMENT
#define MAX_STACK_ALIGNMENT (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
-/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions
- may require 16-byte alignment. */
-#undef STACK_REALIGN_DEFAULT
-#define STACK_REALIGN_DEFAULT TARGET_SSE
-
/* Support hooks for SEH. */
#undef TARGET_ASM_UNWIND_EMIT
#define TARGET_ASM_UNWIND_EMIT i386_pe_seh_unwind_emit
@@ -247,9 +246,10 @@ do { \
#undef ASM_OUTPUT_LABELREF
#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
do { \
+ const char *prefix = ""; \
if ((NAME)[0] != FASTCALL_PREFIX) \
- fputs (user_label_prefix, (STREAM)); \
- fputs ((NAME), (STREAM)); \
+ prefix = user_label_prefix; \
+ ix86_asm_output_labelref ((STREAM), prefix, (NAME)); \
} while (0)
/* This does much the same in memory rather than to a stream. */
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 1ff05e5..fe71f55 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -374,33 +374,6 @@ detect_caches_intel (bool xeon_mp, unsigned max_level,
#define has_feature(f) \
has_cpu_feature (&cpu_model, cpu_features2, f)
-/* We will emit a warning when using AVX10.1 and AVX512 options with one
- enabled and the other disabled. Add this function to avoid push "-mno-"
- options under this scenario for -march=native. */
-
-bool check_avx512_features (__processor_model &cpu_model,
- unsigned int (&cpu_features2)[SIZE_OF_CPU_FEATURES],
- const enum processor_features feature)
-{
- if (has_feature (FEATURE_AVX10_1_256)
- && ((feature == FEATURE_AVX512F)
- || (feature == FEATURE_AVX512CD)
- || (feature == FEATURE_AVX512DQ)
- || (feature == FEATURE_AVX512BW)
- || (feature == FEATURE_AVX512VL)
- || (feature == FEATURE_AVX512IFMA)
- || (feature == FEATURE_AVX512VBMI)
- || (feature == FEATURE_AVX512VBMI2)
- || (feature == FEATURE_AVX512VNNI)
- || (feature == FEATURE_AVX512VPOPCNTDQ)
- || (feature == FEATURE_AVX512BITALG)
- || (feature == FEATURE_AVX512FP16)
- || (feature == FEATURE_AVX512BF16)))
- return false;
-
- return true;
-}
-
/* This will be called by the spec parser in gcc.cc when it sees
a %:local_cpu_detect(args) construct. Currently it will be
called with either "arch [32|64]" or "tune [32|64]" as argument
@@ -627,7 +600,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
if (has_feature (FEATURE_AVX512F))
{
/* Assume Diamond Rapids. */
- if (has_feature (FEATURE_AMX_TRANSPOSE))
+ if (has_feature (FEATURE_AMX_FP8))
cpu = "diamondrapids";
/* Assume Granite Rapids D. */
else if (has_feature (FEATURE_AMX_COMPLEX))
@@ -909,12 +882,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
options = concat (options, " ",
isa_names_table[i].option, NULL);
}
- /* Never push -mno-avx10.1-{256,512} under -march=native to
- avoid unnecessary warnings when building libraries. */
- else if (isa_names_table[i].feature != FEATURE_AVX10_1_256
- && isa_names_table[i].feature != FEATURE_AVX10_1
- && check_avx512_features (cpu_model, cpu_features2,
- isa_names_table[i].feature))
+ else
options = concat (options, neg_option,
isa_names_table[i].option + 2, NULL);
}
diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile
index 528b34e..0e9e5fe 100755
--- a/gcc/config/i386/gcc-auto-profile
+++ b/gcc/config/i386/gcc-auto-profile
@@ -24,8 +24,16 @@ if [ "$1" = "--all" ] ; then
shift
fi
-if ! grep -q Intel /proc/cpuinfo ; then
- echo >&2 "Only Intel CPUs supported"
+if grep -q AuthenticAMD /proc/cpuinfo ; then
+ vendor=AMD
+ if ! grep -q " brs" /proc/cpuinfo && ! grep -q amd_lbr_v2 /proc/cpuinfo ; then
+ echo >&2 "AMD CPU with brs (Zen 3) or amd_lbr_v2 (Zen 4+) feature is required"
+ exit 1
+ fi
+elif grep -q Intel /proc/cpuinfo ; then
+ vendor=Intel
+else
+ echo >&2 "Only AMD and Intel CPUs supported"
exit 1
fi
@@ -33,7 +41,7 @@ if grep -q hypervisor /proc/cpuinfo ; then
echo >&2 "Warning: branch profiling may not be functional in VMs"
fi
-case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
+case `test $vendor = Intel && grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
grep -E "^model\s*:" /proc/cpuinfo | head -n1` in
model*:\ 46|\
model*:\ 30|\
@@ -82,6 +90,8 @@ model*:\ 126|\
model*:\ 167|\
model*:\ 140|\
model*:\ 141|\
+model*:\ 143|\
+model*:\ 207|\
model*:\ 106|\
model*:\ 108|\
model*:\ 173|\
@@ -89,15 +99,20 @@ model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;;
model*:\ 134|\
model*:\ 150|\
model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
-model*:\ 143|\
-model*:\ 207) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;;
-model*:\ 190) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
+model*:\ 190|\
+model*:\ 175|\
+model*:\ 182) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;;
*)
if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; then
E=br_inst_retired.near_taken:p
+ elif perf list ex_ret_brn_tkn | grep -q ex_ret_brn_tkn ; then
+ E=ex_ret_brn_tkn:P$FLAGS
+ elif $vendor = Intel ; then
+echo >&2 "Unknown Intel CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+ exit 1
else
-echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+echo >&2 "AMD CPU without support for ex_ret_brn_tkn event"
exit 1
fi ;;
esac
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index c7e21e6..bc433c2 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -297,9 +297,9 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
#pragma GCC pop_options
#endif /* __GFNIAVX512VLBW__ */
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__)
+#if !defined(__GFNI__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("gfni,avx512f,evex512")
+#pragma GCC target("gfni,avx512f")
#define __DISABLE_GFNIAVX512F__
#endif /* __GFNIAVX512F__ */
@@ -341,9 +341,9 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
#pragma GCC pop_options
#endif /* __GFNIAVX512F__ */
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__)
+#if !defined(__GFNI__) || !defined(__AVX512BW__)
#pragma GCC push_options
-#pragma GCC target("gfni,avx512bw,evex512")
+#pragma GCC target("gfni,avx512bw")
#define __DISABLE_GFNIAVX512FBW__
#endif /* __GFNIAVX512FBW__ */
diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc
index e083f49..87804a5 100644
--- a/gcc/config/i386/host-mingw32.cc
+++ b/gcc/config/i386/host-mingw32.cc
@@ -135,7 +135,6 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
and earlier, backslashes are invalid in object name. So, we need
to check if we are on Windows2000 or higher. */
OSVERSIONINFO version_info;
- int r;
version_info.dwOSVersionInfoSize = sizeof (version_info);
@@ -169,25 +168,24 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
return -1;
}
- /* Retry five times, as here might occure a race with multiple gcc's
- instances at same time. */
- for (r = 0; r < 5; r++)
- {
- mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
- size, addr);
- if (mmap_addr == addr)
- break;
- if (r != 4)
- Sleep (500);
- }
-
- if (mmap_addr != addr)
+ /* Try mapping the file at `addr`. */
+ mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+ size, addr);
+ if (mmap_addr == NULL)
{
- w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
- CloseHandle(mmap_handle);
- return -1;
+ /* We could not map the file at its original address, so let the
+ system choose a different one. The PCH can be relocated later. */
+ mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+ size, NULL);
+ if (mmap_addr == NULL)
+ {
+ w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+ CloseHandle(mmap_handle);
+ return -1;
+ }
}
+ addr = mmap_addr;
return 1;
}
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a142711..fe42c6436 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -204,53 +204,53 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstored256, "__builtin_ia32_mas
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI)
/* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI)
@@ -297,14 +297,14 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si,
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT)
/* AVX512BW */
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
/* AVX512VP2INTERSECT */
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI)
@@ -411,9 +411,9 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
/* AVX512FP16 */
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
@@ -434,17 +434,17 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_B
BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
/* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
@@ -1384,230 +1384,230 @@ BDESC (OPTION_MASK_ISA_BMI2, 0, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si"
BDESC (OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64)
/* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
/* Mask arithmetic operations */
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
@@ -2433,136 +2433,136 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI)
/* AVX512DQ. */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
/* AVX512BW. */
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
/* AVX512IFMA */
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2577,13 +2577,13 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
/* AVX512VBMI */
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
@@ -2594,16 +2594,16 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
/* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
@@ -2612,64 +2612,64 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi_mask, "__builtin_ia32_vpshld_v8hi_mask", IX86_BUILTIN_VPSHLDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si, "__builtin_ia32_vpshld_v8si", IX86_BUILTIN_VPSHLDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si_mask, "__builtin_ia32_vpshld_v8si_mask", IX86_BUILTIN_VPSHLDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si, "__builtin_ia32_vpshld_v4si", IX86_BUILTIN_VPSHLDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si_mask, "__builtin_ia32_vpshld_v4si_mask", IX86_BUILTIN_VPSHLDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di, "__builtin_ia32_vpshld_v4di", IX86_BUILTIN_VPSHLDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di_mask, "__builtin_ia32_vpshld_v4di_mask", IX86_BUILTIN_VPSHLDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di, "__builtin_ia32_vpshld_v2di", IX86_BUILTIN_VPSHLDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2677,27 +2677,27 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2706,20 +2706,20 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
/* GFNI */
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineinvqb_v32qi, "__builtin_ia32_vgf2p8affineinvqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v32qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineinvqb_v16qi, "__builtin_ia32_vgf2p8affineinvqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineinvqb_v16qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineqb_v32qi, "__builtin_ia32_vgf2p8affineqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v32qi_mask, "__builtin_ia32_vgf2p8affineqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineqb_v16qi, "__builtin_ia32_vgf2p8affineqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineqb_v16qi_mask, "__builtin_ia32_vgf2p8affineqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8mulb_v32qi, "__builtin_ia32_vgf2p8mulb_v32qi", IX86_BUILTIN_VGF2P8MULB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v32qi_mask, "__builtin_ia32_vgf2p8mulb_v32qi_mask", IX86_BUILTIN_VGF2P8MULB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
@@ -2727,9 +2727,9 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v
/* AVX512_VNNI */
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2737,9 +2737,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2747,9 +2747,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2757,9 +2757,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2798,13 +2798,13 @@ BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpd
/* VPCLMULQDQ */
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
-BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
/* VPOPCNTDQ */
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
@@ -2816,21 +2816,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
/* BITALG */
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI)
@@ -2840,39 +2840,39 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B
/* VAES. */
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
/* BF16 */
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf, "__builtin_ia32_cvtne2ps2bf16_v16bf", IX86_BUILTIN_CVTNE2PS2BF16_V16BF, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_mask, "__builtin_ia32_cvtne2ps2bf16_v16bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASK, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_V16BF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v16bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf, "__builtin_ia32_cvtne2ps2bf16_v8bf", IX86_BUILTIN_CVTNE2PS2BF16_V8BF, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_mask, "__builtin_ia32_cvtne2ps2bf16_v8bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_V8BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v8bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2BF16_V8SF, UNKNOWN, (int) V8BF_FTYPE_V8SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V8SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V8SF_V8BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2BF16_V4SF, UNKNOWN, (int) V8BF_FTYPE_V4SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V4SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V8BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V4SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPBF16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPBF16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPBF16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
@@ -2885,40 +2885,40 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_extendbfsf2_1, "__builtin_ia32_cvtbf2sf
/* AVX512FP16. */
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
@@ -2928,7 +2928,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
@@ -3366,26 +3366,26 @@ BDESC_END (ARGS, ROUND_ARGS)
/* AVX512F. */
BDESC_FIRST (round_args, ROUND_ARGS,
- OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+ OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT)
@@ -3393,72 +3393,72 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd_mask_round", IX86_BUILTIN_GETEXPSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss_mask_round", IX86_BUILTIN_GETEXPSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_mask_round, "__builtin_ia32_getmantsd_mask_round", IX86_BUILTIN_GETMANTSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_mask_round, "__builtin_ia32_getmantss_mask_round", IX86_BUILTIN_GETMANTSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
@@ -3479,12 +3479,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_sse_cvttss2si_round, "__built
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
@@ -3495,100 +3495,100 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
/* AVX512DQ. */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
/* AVX512FP16. */
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
@@ -3601,32 +3601,32 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
@@ -3634,18 +3634,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fma_fcmaddcsh_v8hf_round, "__builtin_ia32_vfcmaddcsh_round", IX86_BUILTIN_VFCMADDCSH_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask1_round, "__builtin_ia32_vfcmaddcsh_mask_round", IX86_BUILTIN_VFCMADDCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask3_round, "__builtin_ia32_vfcmaddcsh_mask3_round", IX86_BUILTIN_VFCMADDCSH_MASK3_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 2e7381b..4835b94 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -801,102 +801,102 @@ ix86_init_mmx_sse_builtins (void)
IX86_BUILTIN_GATHERALTDIV8SI);
/* AVX512F */
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv16sf",
V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
IX86_BUILTIN_GATHER3SIV16SF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv8df",
V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
IX86_BUILTIN_GATHER3SIV8DF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv16sf",
V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV16SF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv8df",
V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV8DF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv16si",
V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
IX86_BUILTIN_GATHER3SIV16SI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv8di",
V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
IX86_BUILTIN_GATHER3SIV8DI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv16si",
V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV16SI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv8di",
V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV8DI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altsiv8df ",
V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
IX86_BUILTIN_GATHER3ALTSIV8DF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altdiv16sf ",
V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
IX86_BUILTIN_GATHER3ALTDIV16SF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altsiv8di ",
V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
IX86_BUILTIN_GATHER3ALTSIV8DI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altdiv16si ",
V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
IX86_BUILTIN_GATHER3ALTDIV16SI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv16sf",
VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
IX86_BUILTIN_SCATTERSIV16SF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv8df",
VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
IX86_BUILTIN_SCATTERSIV8DF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv16sf",
VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
IX86_BUILTIN_SCATTERDIV16SF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv8df",
VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
IX86_BUILTIN_SCATTERDIV8DF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv16si",
VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
IX86_BUILTIN_SCATTERSIV16SI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv8di",
VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
IX86_BUILTIN_SCATTERSIV8DI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv16si",
VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
IX86_BUILTIN_SCATTERDIV16SI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv8di",
VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
IX86_BUILTIN_SCATTERDIV8DI);
@@ -1046,22 +1046,22 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
IX86_BUILTIN_SCATTERDIV2DI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltsiv8df ",
VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
IX86_BUILTIN_SCATTERALTSIV8DF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltdiv16sf ",
VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
IX86_BUILTIN_SCATTERALTDIV16SF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltsiv8di ",
VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
IX86_BUILTIN_SCATTERALTSIV8DI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltdiv16si ",
VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
IX86_BUILTIN_SCATTERALTDIV16SI);
@@ -1676,7 +1676,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
enum ix86_builtins code;
const machine_mode mode = TYPE_MODE (TREE_TYPE (mem_vectype));
- if ((!TARGET_AVX512F || !TARGET_EVEX512) && GET_MODE_SIZE (mode) == 64)
+ if (!TARGET_AVX512F && GET_MODE_SIZE (mode) == 64)
return NULL_TREE;
if (! TARGET_AVX2
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 0a320ca..457aa05 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -729,12 +729,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SHA512__");
if (isa_flag2 & OPTION_MASK_ISA2_SM4)
def_or_undef (parse_in, "__SM4__");
- if (isa_flag2 & OPTION_MASK_ISA2_EVEX512)
- def_or_undef (parse_in, "__EVEX512__");
if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR)
def_or_undef (parse_in, "__USER_MSR__");
- if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256)
- def_or_undef (parse_in, "__AVX10_1_256__");
if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1)
def_or_undef (parse_in, "__AVX10_1__");
if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..83076ad 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3396,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
too common scenario. */
start_sequence ();
compare_op = ix86_expand_fp_compare (code, op0, op1);
- compare_seq = get_insns ();
- end_sequence ();
+ compare_seq = end_sequence ();
if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
@@ -3561,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[])
start_sequence ();
compare_op = ix86_expand_compare (code, op0, op1);
- compare_seq = get_insns ();
- end_sequence ();
+ compare_seq = end_sequence ();
compare_code = GET_CODE (compare_op);
@@ -3611,7 +3609,11 @@ ix86_expand_int_movcc (rtx operands[])
negate_cc_compare_p = true;
}
- diff = ct - cf;
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
+
/* Sign bit compares are better done using shifts than we do by using
sbb. */
if (sign_bit_compare_p
@@ -3669,7 +3671,12 @@ ix86_expand_int_movcc (rtx operands[])
PUT_CODE (compare_op,
reverse_condition (GET_CODE (compare_op)));
}
- diff = ct - cf;
+
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
if (reg_overlap_mentioned_p (out, compare_op))
tmp = gen_reg_rtx (mode);
@@ -3687,7 +3694,12 @@ ix86_expand_int_movcc (rtx operands[])
else
{
std::swap (ct, cf);
- diff = ct - cf;
+
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
}
tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
}
@@ -3754,9 +3766,15 @@ ix86_expand_int_movcc (rtx operands[])
tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
}
+ HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+ return false;
+
tmp = expand_simple_binop (mode, AND,
copy_rtx (tmp),
- gen_int_mode (cf - ct, mode),
+ gen_int_mode (ival, mode),
copy_rtx (tmp), 1, OPTAB_DIRECT);
if (ct)
tmp = expand_simple_binop (mode, PLUS,
@@ -3793,7 +3811,13 @@ ix86_expand_int_movcc (rtx operands[])
if (new_code != UNKNOWN)
{
std::swap (ct, cf);
- diff = -diff;
+
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
+
code = new_code;
}
}
@@ -3996,8 +4020,14 @@ ix86_expand_int_movcc (rtx operands[])
copy_rtx (out), 1, OPTAB_DIRECT);
}
+ HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+ return false;
+
out = expand_simple_binop (mode, AND, copy_rtx (out),
- gen_int_mode (cf - ct, mode),
+ gen_int_mode (ival, mode),
copy_rtx (out), 1, OPTAB_DIRECT);
if (ct)
out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
@@ -4138,6 +4168,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
return false;
mode = GET_MODE (dest);
+ if (immediate_operand (if_false, mode))
+ if_false = force_reg (mode, if_false);
+ if (immediate_operand (if_true, mode))
+ if_true = force_reg (mode, if_true);
/* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
but MODE may be a vector mode and thus not appropriate. */
@@ -4186,7 +4220,7 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW)
return false;
- return (vector_size == 64 && TARGET_EVEX512) || TARGET_AVX512VL;
+ return vector_size == 64 || TARGET_AVX512VL;
}
/* Return true if integer mask comparison should be used. */
@@ -4687,6 +4721,8 @@ ix86_expand_fp_movcc (rtx operands[])
compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
}
+ operands[2] = force_reg (mode, operands[2]);
+ operands[3] = force_reg (mode, operands[3]);
emit_insn (gen_rtx_SET (operands[0],
gen_rtx_IF_THEN_ELSE (mode, compare_op,
operands[2], operands[3])));
@@ -5022,7 +5058,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
/* Don't do it if not using integer masks and we'd end up with
the right values in the registers though. */
- && ((GET_MODE_SIZE (mode) == 64 && TARGET_EVEX512)
+ && (GET_MODE_SIZE (mode) == 64
|| !vector_all_ones_operand (optrue, data_mode)
|| opfalse != CONST0_RTX (data_mode))))
{
@@ -8901,31 +8937,34 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
/* Return true if ALG can be used in current context.
Assume we expand memset if MEMSET is true. */
static bool
-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
+alg_usable_p (enum stringop_alg alg, bool memset,
+ addr_space_t dst_as, addr_space_t src_as)
{
if (alg == no_stringop)
return false;
/* It is not possible to use a library call if we have non-default
address space. We can do better than the generic byte-at-a-time
loop, used as a fallback. */
- if (alg == libcall && have_as)
+ if (alg == libcall &&
+ !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as)))
return false;
if (alg == vector_loop)
return TARGET_SSE || TARGET_AVX;
/* Algorithms using the rep prefix want at least edi and ecx;
additionally, memset wants eax and memcpy wants esi. Don't
consider such algorithms if the user has appropriated those
- registers for their own purposes, or if we have a non-default
- address space, since some string insns cannot override the segment. */
+ registers for their own purposes, or if we have the destination
+ in the non-default address space, since string insns cannot
+ override the destination segment. */
if (alg == rep_prefix_1_byte
|| alg == rep_prefix_4_byte
|| alg == rep_prefix_8_byte)
{
- if (have_as)
- return false;
if (fixed_regs[CX_REG]
|| fixed_regs[DI_REG]
- || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
+ || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])
+ || !ADDR_SPACE_GENERIC_P (dst_as)
+ || !(ADDR_SPACE_GENERIC_P (src_as) || Pmode == word_mode))
return false;
}
return true;
@@ -8935,8 +8974,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
static enum stringop_alg
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
- bool memset, bool zero_memset, bool have_as,
- int *dynamic_check, bool *noalign, bool recur)
+ bool memset, bool zero_memset, addr_space_t dst_as,
+ addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur)
{
const struct stringop_algs *algs;
bool optimize_for_speed;
@@ -8968,7 +9007,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
{
enum stringop_alg candidate = algs->size[i].alg;
- bool usable = alg_usable_p (candidate, memset, have_as);
+ bool usable = alg_usable_p (candidate, memset, dst_as, src_as);
any_alg_usable_p |= usable;
if (candidate != libcall && candidate && usable)
@@ -8984,17 +9023,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* If user specified the algorithm, honor it if possible. */
if (ix86_stringop_alg != no_stringop
- && alg_usable_p (ix86_stringop_alg, memset, have_as))
+ && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as))
return ix86_stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
else if (!optimize_for_speed)
{
*noalign = true;
if (!count || (count & 3) || (memset && !zero_memset))
- return alg_usable_p (rep_prefix_1_byte, memset, have_as)
+ return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as)
? rep_prefix_1_byte : loop_1_byte;
else
- return alg_usable_p (rep_prefix_4_byte, memset, have_as)
+ return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as)
? rep_prefix_4_byte : loop;
}
/* Very tiny blocks are best handled via the loop, REP is expensive to
@@ -9018,7 +9057,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
enum stringop_alg candidate = algs->size[i].alg;
if (candidate != libcall
- && alg_usable_p (candidate, memset, have_as))
+ && alg_usable_p (candidate, memset, dst_as, src_as))
{
alg = candidate;
alg_noalign = algs->size[i].noalign;
@@ -9038,7 +9077,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
else if (!any_alg_usable_p)
break;
}
- else if (alg_usable_p (candidate, memset, have_as)
+ else if (alg_usable_p (candidate, memset, dst_as, src_as)
&& !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB
&& candidate == rep_prefix_1_byte
/* NB: If min_size != max_size, size is
@@ -9060,7 +9099,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
choice in ix86_costs. */
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
&& (algs->unknown_size == libcall
- || !alg_usable_p (algs->unknown_size, memset, have_as)))
+ || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as)))
{
enum stringop_alg alg;
HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
@@ -9075,8 +9114,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
*dynamic_check = 128;
return loop_1_byte;
}
- alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
- zero_memset, have_as, dynamic_check, noalign, true);
+ alg = decide_alg (count, new_expected_size, min_size, max_size,
+ memset, zero_memset, dst_as, src_as,
+ dynamic_check, noalign, true);
gcc_assert (*dynamic_check == -1);
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = max;
@@ -9088,7 +9128,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* Try to use some reasonable fallback algorithm. Note that for
non-default address spaces we default to a loop instead of
a libcall. */
- return (alg_usable_p (algs->unknown_size, memset, have_as)
+
+ bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as)
+ && ADDR_SPACE_GENERIC_P (src_as));
+
+ return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as)
? algs->unknown_size : have_as ? loop : libcall);
}
@@ -9307,14 +9351,13 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
bool need_zero_guard = false;
bool noalign;
machine_mode move_mode = VOIDmode;
- machine_mode wider_mode;
int unroll_factor = 1;
/* TODO: Once value ranges are available, fill in proper data. */
unsigned HOST_WIDE_INT min_size = 0;
unsigned HOST_WIDE_INT max_size = -1;
unsigned HOST_WIDE_INT probable_max_size = -1;
bool misaligned_prologue_used = false;
- bool have_as;
+ addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
@@ -9352,16 +9395,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
if (count > (HOST_WIDE_INT_1U << 30))
return false;
- have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
+ dst_as = MEM_ADDR_SPACE (dst);
if (!issetmem)
- have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
+ src_as = MEM_ADDR_SPACE (src);
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
alg = decide_alg (count, expected_size, min_size, probable_max_size,
- issetmem,
- issetmem && val_exp == const0_rtx, have_as,
- &dynamic_check, &noalign, false);
+ issetmem, issetmem && val_exp == const0_rtx,
+ dst_as, src_as, &dynamic_check, &noalign, false);
if (dump_file)
fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
@@ -9384,6 +9426,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
unroll_factor = 1;
move_mode = word_mode;
+ int nunits;
switch (alg)
{
case libcall:
@@ -9404,27 +9447,14 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
case vector_loop:
need_zero_guard = true;
unroll_factor = 4;
- /* Find the widest supported mode. */
- move_mode = word_mode;
- while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
- && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
- move_mode = wider_mode;
-
- if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
- move_mode = TImode;
- if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
- move_mode = OImode;
-
- /* Find the corresponding vector mode with the same size as MOVE_MODE.
- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
+ /* Get the vector mode to move MOVE_MAX bytes. */
+ nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+ if (nunits > 1)
{
- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
- if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
- || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
- move_mode = word_mode;
+ move_mode = mode_for_vector (word_mode, nunits).require ();
+ gcc_assert (optab_handler (mov_optab, move_mode)
+ != CODE_FOR_nothing);
}
- gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
break;
case rep_prefix_8_byte:
move_mode = DImode;
@@ -10108,9 +10138,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
if (lookup_attribute ("interrupt",
TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
error ("interrupt service routine cannot be called directly");
- else if (lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ else if (ix86_type_no_callee_saved_registers_p (TREE_TYPE (fndecl)))
call_no_callee_saved_registers = true;
+ if (fndecl == current_function_decl
+ && decl_binds_to_current_def_p (fndecl))
+ cfun->machine->recursive_function = true;
}
}
else
@@ -10120,8 +10152,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
tree mem_expr = MEM_EXPR (fnaddr);
if (mem_expr != nullptr
&& TREE_CODE (mem_expr) == MEM_REF
- && lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (mem_expr))))
+ && ix86_type_no_callee_saved_registers_p (TREE_TYPE (mem_expr)))
call_no_callee_saved_registers = true;
}
@@ -10346,6 +10377,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (!fixed_regs[i]
+ && i != HARD_FRAME_POINTER_REGNUM
&& !(ix86_call_used_regs[i] == 1
|| (ix86_call_used_regs[i] & c_mask))
&& !STACK_REGNO_P (i)
@@ -11244,6 +11276,54 @@ fixup_modeless_constant (rtx x, machine_mode mode)
return x;
}
+/* Expand the outgoing argument ARG to extract unsigned char and short
+ integer constants suitable for the predicates and the instruction
+ templates which expect the unsigned expanded value. */
+
+static rtx
+ix86_expand_unsigned_small_int_cst_argument (tree arg)
+{
+ /* When passing 0xff as an unsigned char function argument with the
+ C frontend promotion, expand_normal gets
+
+ <integer_cst 0x7fffe6aa23a8 type <integer_type 0x7fffe98225e8 int> constant 255>
+
+ and returns the rtx value using the sign-extended representation:
+
+ (const_int 255 [0xff])
+
+ Without the C frontend promotion, expand_normal gets
+
+ <integer_cst 0x7fffe9824018 type <integer_type 0x7fffe9822348 unsigned char > constant 255>
+
+ and returns
+
+ (const_int -1 [0xffffffffffffffff])
+
+ which doesn't work with the predicates nor the instruction templates
+ which expect the unsigned expanded value. Extract the unsigned char
+ and short integer constants to return
+
+ (const_int 255 [0xff])
+
+ so that the expanded value is always unsigned, without the C frontend
+ promotion. */
+
+ if (TREE_CODE (arg) == INTEGER_CST)
+ {
+ tree type = TREE_TYPE (arg);
+ if (INTEGRAL_TYPE_P (type)
+ && TYPE_UNSIGNED (type)
+ && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+ {
+ HOST_WIDE_INT cst = TREE_INT_CST_LOW (arg);
+ return GEN_INT (cst);
+ }
+ }
+
+ return expand_normal (arg);
+}
+
/* Subroutine of ix86_expand_builtin to take care of insns with
variable number of operands. */
@@ -12142,7 +12222,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
for (i = 0; i < nargs; i++)
{
tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
+ rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
machine_mode mode = insn_p->operand[i + 1].mode;
/* Need to fixup modeless constant before testing predicate. */
op = fixup_modeless_constant (op, mode);
@@ -12837,7 +12917,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
for (i = 0; i < nargs; i++)
{
tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
+ rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
machine_mode mode = insn_p->operand[i + 1].mode;
bool match = insn_p->operand[i + 1].predicate (op, mode);
@@ -13322,7 +13402,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
machine_mode mode = insn_p->operand[i + 1].mode;
arg = CALL_EXPR_ARG (exp, i + arg_adjust);
- op = expand_normal (arg);
+ op = ix86_expand_unsigned_small_int_cst_argument (arg);
if (i == memory)
{
@@ -15466,7 +15546,7 @@ rdseed_step:
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
+ op3 = ix86_expand_unsigned_small_int_cst_argument (arg3);
op4 = expand_normal (arg4);
/* Note the arg order is different from the operand order. */
mode0 = insn_data[icode].operand[1].mode;
@@ -15681,7 +15761,7 @@ rdseed_step:
arg3 = CALL_EXPR_ARG (exp, 3);
arg4 = CALL_EXPR_ARG (exp, 4);
op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
+ op1 = ix86_expand_unsigned_small_int_cst_argument (arg1);
op2 = expand_normal (arg2);
op3 = expand_normal (arg3);
op4 = expand_normal (arg4);
@@ -16130,7 +16210,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
{
case VEC_BCAST_PXOR:
if ((mode == V8SImode && !TARGET_AVX2)
- || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+ || (mode == V16SImode && !TARGET_AVX512F))
return false;
emit_move_insn (target, CONST0_RTX (mode));
return true;
@@ -16138,7 +16218,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
case VEC_BCAST_PCMPEQ:
if ((mode == V4SImode && !TARGET_SSE2)
|| (mode == V8SImode && !TARGET_AVX2)
- || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+ || (mode == V16SImode && !TARGET_AVX512F))
return false;
emit_move_insn (target, CONSTM1_RTX (mode));
return true;
@@ -16158,7 +16238,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V32QImode);
emit_insn (gen_absv32qi2 (tmp2, tmp1));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V64QImode);
emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16184,7 +16264,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V32QImode);
emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V64QImode);
emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16210,7 +16290,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V16HImode);
emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V32HImode);
emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16236,7 +16316,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg)));
return true;
}
- else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512F)
{
tmp1 = gen_reg_rtx (V16SImode);
emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16262,7 +16342,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V16HImode);
emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V32HImode);
emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16288,7 +16368,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg)));
return true;
}
- else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512F)
{
tmp1 = gen_reg_rtx (V16SImode);
emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16342,8 +16422,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
if (GET_MODE (reg) != innermode)
reg = gen_lowpart (innermode, reg);
SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (seq)
emit_insn_before (seq, insn);
@@ -16659,7 +16738,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
case E_V32HFmode:
case E_V32BFmode:
- gcc_assert (TARGET_EVEX512);
if (TARGET_AVX512BW)
return ix86_vector_duplicate_value (mode, target, val);
else
@@ -16712,9 +16790,6 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
bool use_vector_set = false;
rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
- if (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
- return false;
-
switch (mode)
{
case E_V2DImode:
@@ -18670,6 +18745,33 @@ emit_reduc_half (rtx dest, rtx src, int i)
case E_V8HFmode:
case E_V4SImode:
case E_V2DImode:
+ if (TARGET_SSE_REDUCTION_PREFER_PSHUF)
+ {
+ if (i == 128)
+ {
+ d = gen_reg_rtx (V4SImode);
+ tem = gen_sse2_pshufd_1 (
+ d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+ GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3));
+ break;
+ }
+ else if (i == 64)
+ {
+ d = gen_reg_rtx (V4SImode);
+ tem = gen_sse2_pshufd_1 (
+ d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+ GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+ break;
+ }
+ else if (i == 32)
+ {
+ d = gen_reg_rtx (V8HImode);
+ tem = gen_sse2_pshuflw_1 (
+ d, force_reg (V8HImode, gen_lowpart (V8HImode, src)),
+ GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+ break;
+ }
+ }
d = gen_reg_rtx (V1TImode);
tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
GEN_INT (i / 2));
@@ -19256,8 +19358,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19370,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
@@ -19356,7 +19478,7 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
unsigned vector_size = GET_MODE_SIZE (mode);
if (TARGET_FMA
- || (TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+ || (TARGET_AVX512F && vector_size == 64)
|| (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
emit_insn (gen_rtx_SET (e2,
gen_rtx_FMA (mode, e0, x0, mthree)));
@@ -22018,8 +22140,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
start_sequence ();
ok = expand_vec_perm_1 (&dfinal);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (!ok)
return false;
@@ -22355,8 +22476,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
start_sequence ();
ok = expand_vec_perm_1 (&dfirst);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (!ok)
return false;
@@ -22464,8 +22584,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dfirst);
- seq1 = get_insns ();
- end_sequence ();
+ seq1 = end_sequence ();
if (!ok)
return false;
@@ -22475,8 +22594,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dsecond);
- seq2 = get_insns ();
- end_sequence ();
+ seq2 = end_sequence ();
if (!ok)
return false;
@@ -22590,8 +22708,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dfirst);
- seq1 = get_insns ();
- end_sequence ();
+ seq1 = end_sequence ();
if (!ok)
return false;
@@ -22601,8 +22718,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dsecond);
- seq2 = get_insns ();
- end_sequence ();
+ seq2 = end_sequence ();
if (!ok)
return false;
@@ -22796,8 +22912,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
canonicalize_perm (&dfirst);
start_sequence ();
ok = ix86_expand_vec_perm_const_1 (&dfirst);
- seq1 = get_insns ();
- end_sequence ();
+ seq1 = end_sequence ();
if (!ok)
return false;
@@ -22805,8 +22920,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
canonicalize_perm (&dsecond);
start_sequence ();
ok = ix86_expand_vec_perm_const_1 (&dsecond);
- seq2 = get_insns ();
- end_sequence ();
+ seq2 = end_sequence ();
if (!ok)
return false;
@@ -24290,9 +24404,6 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
unsigned int i, nelt, which;
bool two_args;
- if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
- return false;
-
/* For HF and BF mode vector, convert it to HI using subreg. */
if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
{
@@ -24834,7 +24945,6 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
ix86_expand_vecop_qihi. */
if (!TARGET_AVX512BW
|| (qimode == V16QImode && !TARGET_AVX512VL)
- || (qimode == V32QImode && !TARGET_EVEX512)
/* There are no V64HImode instructions. */
|| qimode == V64QImode)
return false;
@@ -25303,7 +25413,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
machine_mode mode = GET_MODE (op0);
rtx t1, t2, t3, t4, t5, t6;
- if (TARGET_AVX512DQ && TARGET_EVEX512 && mode == V8DImode)
+ if (TARGET_AVX512DQ && mode == V8DImode)
emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
@@ -26033,8 +26143,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
}
}
- *prep_seq = get_insns ();
- end_sequence ();
+ *prep_seq = end_sequence ();
start_sequence ();
@@ -26045,8 +26154,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
end_sequence ();
return NULL_RTX;
}
- *gen_seq = get_insns ();
- end_sequence ();
+ *gen_seq = end_sequence ();
return res;
}
@@ -26089,8 +26197,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
return NULL_RTX;
}
- *prep_seq = get_insns ();
- end_sequence ();
+ *prep_seq = end_sequence ();
target = gen_rtx_REG (cc_mode, FLAGS_REG);
dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
@@ -26121,8 +26228,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
return NULL_RTX;
}
- *gen_seq = get_insns ();
- end_sequence ();
+ *gen_seq = end_sequence ();
return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
}
@@ -26136,8 +26242,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x)
{
if (!TARGET_AVX512F
|| !CONST_VECTOR_P (x)
- || (!TARGET_AVX512VL
- && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512))
+ || (!TARGET_AVX512VL && GET_MODE_SIZE (mode) != 64)
|| !VALID_BCST_MODE_P (GET_MODE_INNER (mode))
/* Disallow HFmode broadcast. */
|| GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4)
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c35ac24..054f8d5 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
insns_conv = BITMAP_ALLOC (NULL);
queue = NULL;
- n_sse_to_integer = 0;
- n_integer_to_sse = 0;
-
+ cost_sse_integer = 0;
+ weighted_cost_sse_integer = 0 ;
max_visits = x86_stv_max_visits;
}
@@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def)
/* Record the def/insn pair so we can later efficiently iterate over
the defs to convert on insns not in the chain. */
bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
+ basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
+ profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ bool speed_p = optimize_bb_for_speed_p (bb);
+ int cost = 0;
+
if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
{
if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
&& !reg_new)
return;
- n_integer_to_sse++;
+
+ /* Cost integer to sse moves. */
+ if (speed_p)
+ cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+ else if (TARGET_64BIT || smode == SImode)
+ cost = COSTS_N_BYTES (4);
+ /* vmovd (4 bytes) + vpinsrd (6 bytes). */
+ else if (TARGET_SSE4_1)
+ cost = COSTS_N_BYTES (10);
+ /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */
+ else
+ cost = COSTS_N_BYTES (12);
}
else
{
if (!reg_new)
return;
- n_sse_to_integer++;
+
+ /* Cost sse to integer moves. */
+ if (speed_p)
+ cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
+ else if (TARGET_64BIT || smode == SImode)
+ cost = COSTS_N_BYTES (4);
+ /* vmovd (4 bytes) + vpextrd (6 bytes). */
+ else if (TARGET_SSE4_1)
+ cost = COSTS_N_BYTES (10);
+ /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */
+ else
+ cost = COSTS_N_BYTES (13);
}
+ if (speed_p)
+ weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
+
+ cost_sse_integer += cost;
+
if (dump_file)
fprintf (dump_file,
" Mark r%d def in insn %d as requiring both modes in chain #%d\n",
@@ -518,26 +549,28 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
instead of using a scalar one. */
int
-general_scalar_chain::vector_const_cost (rtx exp)
+general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
{
gcc_assert (CONST_INT_P (exp));
if (standard_sse_constant_p (exp, vmode))
return ix86_cost->sse_op;
+ if (optimize_bb_for_size_p (bb))
+ return COSTS_N_BYTES (8);
/* We have separate costs for SImode and DImode, use SImode costs
for smaller modes. */
- return ix86_cost->sse_load[smode == DImode ? 1 : 0];
+ return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
}
-/* Compute a gain for chain conversion. */
+/* Return true if it's cost profitable for chain conversion. */
-int
+bool
general_scalar_chain::compute_convert_gain ()
{
bitmap_iterator bi;
unsigned insn_uid;
int gain = 0;
- int cost = 0;
+ sreal weighted_gain = 0;
if (dump_file)
fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -547,7 +580,7 @@ general_scalar_chain::compute_convert_gain ()
smaller modes than SImode the int load/store costs need to be
adjusted as well. */
unsigned sse_cost_idx = smode == DImode ? 1 : 0;
- unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
+ int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
{
@@ -555,26 +588,58 @@ general_scalar_chain::compute_convert_gain ()
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
+ basic_block bb = BLOCK_FOR_INSN (insn);
int igain = 0;
+ profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ bool speed_p = optimize_bb_for_speed_p (bb);
+ sreal bb_freq = bb->count.to_sreal_scale (entry_count);
if (REG_P (src) && REG_P (dst))
- igain += 2 * m - ix86_cost->xmm_move;
+ {
+ if (!speed_p)
+ /* reg-reg move is 2 bytes, while SSE 3. */
+ igain += COSTS_N_BYTES (2 * m - 3);
+ else
+ /* Move costs are normalized to reg-reg move having cost 2. */
+ igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
+ }
else if (REG_P (src) && MEM_P (dst))
- igain
- += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
+ {
+ if (!speed_p)
+ /* Integer load/store is 3+ bytes and SSE 4+. */
+ igain += COSTS_N_BYTES (3 * m - 4);
+ else
+ igain
+ += COSTS_N_INSNS (m * ix86_cost->int_store[2]
+ - ix86_cost->sse_store[sse_cost_idx]) / 2;
+ }
else if (MEM_P (src) && REG_P (dst))
- igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
+ {
+ if (!speed_p)
+ igain += COSTS_N_BYTES (3 * m - 4);
+ else
+ igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
+ - ix86_cost->sse_load[sse_cost_idx]) / 2;
+ }
else
{
/* For operations on memory operands, include the overhead
of explicit load and store instructions. */
if (MEM_P (dst))
- igain += optimize_insn_for_size_p ()
- ? -COSTS_N_BYTES (8)
- : (m * (ix86_cost->int_load[2]
- + ix86_cost->int_store[2])
- - (ix86_cost->sse_load[sse_cost_idx] +
- ix86_cost->sse_store[sse_cost_idx]));
+ {
+ if (!speed_p)
+ /* ??? This probably should account size difference
+ of SSE and integer load rather than full SSE load. */
+ igain -= COSTS_N_BYTES (8);
+ else
+ {
+ int cost = (m * (ix86_cost->int_load[2]
+ + ix86_cost->int_store[2])
+ - (ix86_cost->sse_load[sse_cost_idx] +
+ ix86_cost->sse_store[sse_cost_idx]));
+ igain += COSTS_N_INSNS (cost) / 2;
+ }
+ }
switch (GET_CODE (src))
{
@@ -595,7 +660,7 @@ general_scalar_chain::compute_convert_gain ()
igain += ix86_cost->shift_const - ix86_cost->sse_op;
if (CONST_INT_P (XEXP (src, 0)))
- igain -= vector_const_cost (XEXP (src, 0));
+ igain -= vector_const_cost (XEXP (src, 0), bb);
break;
case ROTATE:
@@ -631,16 +696,17 @@ general_scalar_chain::compute_convert_gain ()
igain += m * ix86_cost->add;
if (CONST_INT_P (XEXP (src, 0)))
- igain -= vector_const_cost (XEXP (src, 0));
+ igain -= vector_const_cost (XEXP (src, 0), bb);
if (CONST_INT_P (XEXP (src, 1)))
- igain -= vector_const_cost (XEXP (src, 1));
+ igain -= vector_const_cost (XEXP (src, 1), bb);
if (MEM_P (XEXP (src, 1)))
{
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
else
- igain += m * ix86_cost->int_load[2]
- - ix86_cost->sse_load[sse_cost_idx];
+ igain += COSTS_N_INSNS
+ (m * ix86_cost->int_load[2]
+ - ix86_cost->sse_load[sse_cost_idx]) / 2;
}
break;
@@ -698,7 +764,7 @@ general_scalar_chain::compute_convert_gain ()
case CONST_INT:
if (REG_P (dst))
{
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
/* xor (2 bytes) vs. xorps (3 bytes). */
if (src == const0_rtx)
@@ -722,14 +788,14 @@ general_scalar_chain::compute_convert_gain ()
/* DImode can be immediate for TARGET_64BIT
and SImode always. */
igain += m * COSTS_N_INSNS (1);
- igain -= vector_const_cost (src);
+ igain -= vector_const_cost (src, bb);
}
}
else if (MEM_P (dst))
{
igain += (m * ix86_cost->int_store[2]
- ix86_cost->sse_store[sse_cost_idx]);
- igain -= vector_const_cost (src);
+ igain -= vector_const_cost (src, bb);
}
break;
@@ -737,13 +803,14 @@ general_scalar_chain::compute_convert_gain ()
if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
{
// movd (4 bytes) replaced with movdqa (4 bytes).
- if (!optimize_insn_for_size_p ())
- igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+ if (!!speed_p)
+ igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
+ - ix86_cost->xmm_move) / 2;
}
else
{
// pshufd; movd replaced with pshufd.
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
igain += COSTS_N_BYTES (4);
else
igain += ix86_cost->sse_to_integer;
@@ -755,55 +822,34 @@ general_scalar_chain::compute_convert_gain ()
}
}
+ if (speed_p)
+ weighted_gain += bb_freq * igain;
+ gain += igain;
+
if (igain != 0 && dump_file)
{
- fprintf (dump_file, " Instruction gain %d for ", igain);
+ fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for",
+ igain, bb_freq.to_double ());
dump_insn_slim (dump_file, insn);
}
- gain += igain;
}
if (dump_file)
- fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
-
- /* Cost the integer to sse and sse to integer moves. */
- if (!optimize_function_for_size_p (cfun))
- {
- cost += n_sse_to_integer * ix86_cost->sse_to_integer;
- /* ??? integer_to_sse but we only have that in the RA cost table.
- Assume sse_to_integer/integer_to_sse are the same which they
- are at the moment. */
- cost += n_integer_to_sse * ix86_cost->sse_to_integer;
- }
- else if (TARGET_64BIT || smode == SImode)
- {
- cost += n_sse_to_integer * COSTS_N_BYTES (4);
- cost += n_integer_to_sse * COSTS_N_BYTES (4);
- }
- else if (TARGET_SSE4_1)
- {
- /* vmovd (4 bytes) + vpextrd (6 bytes). */
- cost += n_sse_to_integer * COSTS_N_BYTES (10);
- /* vmovd (4 bytes) + vpinsrd (6 bytes). */
- cost += n_integer_to_sse * COSTS_N_BYTES (10);
- }
- else
{
- /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */
- cost += n_sse_to_integer * COSTS_N_BYTES (13);
- /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */
- cost += n_integer_to_sse * COSTS_N_BYTES (12);
+ fprintf (dump_file, " Instruction conversion gain: %d, \n",
+ gain);
+ fprintf (dump_file, " Registers conversion cost: %d\n",
+ cost_sse_integer);
+ fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n",
+ weighted_gain.to_double ());
+ fprintf (dump_file, " Weighted registers conversion cost: %.2f\n",
+ weighted_cost_sse_integer.to_double ());
}
- if (dump_file)
- fprintf (dump_file, " Registers conversion cost: %d\n", cost);
-
- gain -= cost;
-
- if (dump_file)
- fprintf (dump_file, " Total gain: %d\n", gain);
-
- return gain;
+ if (weighted_gain != weighted_cost_sse_integer)
+ return weighted_gain > weighted_cost_sse_integer;
+ else
+ return gain > cost_sse_integer;;
}
/* Insert generated conversion instruction sequence INSNS
@@ -902,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
else
emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
gen_gpr_to_xmm_move_src (vmode, reg)));
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_conversion_insns (seq, insn);
if (dump_file)
@@ -970,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
else
emit_move_insn (dst, src);
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_conversion_insns (seq, insn);
if (dump_file)
@@ -1066,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
start_sequence ();
vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn_before (seq, insn);
}
@@ -1508,33 +1551,34 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
with numerous special cases. */
static int
-timode_immed_const_gain (rtx cst)
+timode_immed_const_gain (rtx cst, basic_block bb)
{
/* movabsq vs. movabsq+vmovq+vunpacklqdq. */
if (CONST_WIDE_INT_P (cst)
&& CONST_WIDE_INT_NUNITS (cst) == 2
&& CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
- return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+ return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
: -COSTS_N_INSNS (2);
/* 2x movabsq ~ vmovdqa. */
return 0;
}
-/* Compute a gain for chain conversion. */
+/* Return true it's cost profitable for for chain conversion. */
-int
+bool
timode_scalar_chain::compute_convert_gain ()
{
/* Assume that if we have to move TImode values between units,
then transforming this chain isn't worth it. */
- if (n_sse_to_integer || n_integer_to_sse)
- return -1;
+ if (cost_sse_integer)
+ return false;
bitmap_iterator bi;
unsigned insn_uid;
/* Split ties to prefer V1TImode when not optimizing for size. */
int gain = optimize_size ? 0 : 1;
+ sreal weighted_gain = 0;
if (dump_file)
fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -1546,34 +1590,36 @@ timode_scalar_chain::compute_convert_gain ()
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
HOST_WIDE_INT op1val;
+ basic_block bb = BLOCK_FOR_INSN (insn);
int scost, vcost;
int igain = 0;
+ profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ bool speed_p = optimize_bb_for_speed_p (bb);
+ sreal bb_freq = bb->count.to_sreal_scale (entry_count);
switch (GET_CODE (src))
{
case REG:
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
else
igain = COSTS_N_INSNS (1);
break;
case MEM:
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
- : COSTS_N_INSNS (1);
+ igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
break;
case CONST_INT:
if (MEM_P (dst)
&& standard_sse_constant_p (src, V1TImode))
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+ igain = !speed_p ? COSTS_N_BYTES (11) : 1;
break;
case CONST_WIDE_INT:
/* 2 x mov vs. vmovdqa. */
if (MEM_P (dst))
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
- : COSTS_N_INSNS (1);
+ igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
break;
case NOT:
@@ -1587,14 +1633,14 @@ timode_scalar_chain::compute_convert_gain ()
if (!MEM_P (dst))
igain = COSTS_N_INSNS (1);
if (CONST_SCALAR_INT_P (XEXP (src, 1)))
- igain += timode_immed_const_gain (XEXP (src, 1));
+ igain += timode_immed_const_gain (XEXP (src, 1), bb);
break;
case ASHIFT:
case LSHIFTRT:
/* See ix86_expand_v1ti_shift. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
if (op1val == 64 || op1val == 65)
scost = COSTS_N_BYTES (5);
@@ -1628,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain ()
case ASHIFTRT:
/* See ix86_expand_v1ti_ashiftrt. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
if (op1val == 64 || op1val == 127)
scost = COSTS_N_BYTES (7);
@@ -1706,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain ()
case ROTATERT:
/* See ix86_expand_v1ti_rotate. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
scost = COSTS_N_BYTES (13);
if ((op1val & 31) == 0)
@@ -1738,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain ()
{
if (GET_CODE (XEXP (src, 0)) == AND)
/* and;and;or (9 bytes) vs. ptest (5 bytes). */
- igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
- : COSTS_N_INSNS (2);
+ igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
/* or (3 bytes) vs. ptest (5 bytes). */
- else if (optimize_insn_for_size_p ())
+ else if (!speed_p)
igain = -COSTS_N_BYTES (2);
}
else if (XEXP (src, 1) == const1_rtx)
/* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */
- igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
- : -COSTS_N_INSNS (1);
+ igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
break;
default:
break;
}
+ gain += igain;
+ if (speed_p)
+ weighted_gain += bb_freq * igain;
+
if (igain != 0 && dump_file)
{
- fprintf (dump_file, " Instruction gain %d for ", igain);
+ fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ",
+ igain, bb_freq.to_double ());
dump_insn_slim (dump_file, insn);
}
- gain += igain;
}
if (dump_file)
- fprintf (dump_file, " Total gain: %d\n", gain);
+ fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n",
+ gain, weighted_gain.to_double ());
- return gain;
+ if (weighted_gain > (sreal) 0)
+ return true;
+ else
+ return gain > 0;
}
/* Fix uses of converted REG in debug insns. */
@@ -1874,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
src = validize_mem (force_const_mem (V1TImode, src));
use_move = MEM_P (dst);
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
if (seq)
emit_insn_before (seq, insn);
if (use_move)
@@ -2561,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p)
conversions. */
if (chain->build (&candidates[i], uid, disallowed))
{
- if (chain->compute_convert_gain () > 0)
+ if (chain->compute_convert_gain ())
converted_insns += chain->convert ();
else if (dump_file)
fprintf (dump_file, "Chain #%d conversion is not profitable\n",
@@ -3034,6 +3085,82 @@ ix86_rpad_gate ()
&& optimize_function_for_speed_p (cfun));
}
+/* Generate a vector set, DEST = SRC, at entry of the nearest dominator
+ for basic block map BBS, which is in the fake loop that contains the
+ whole function, so that there is only a single vector set in the
+ whole function. If not nullptr, INNER_SCALAR is the inner scalar of
+ SRC, as (reg:SI 99) in (vec_duplicate:V4SI (reg:SI 99)). */
+
+static void
+ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
+ rtx inner_scalar = nullptr)
+{
+ basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+ while (bb->loop_father->latch
+ != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ bb = get_immediate_dominator (CDI_DOMINATORS,
+ bb->loop_father->header);
+
+ rtx set = gen_rtx_SET (dest, src);
+
+ rtx_insn *insn = BB_HEAD (bb);
+ while (insn && !NONDEBUG_INSN_P (insn))
+ {
+ if (insn == BB_END (bb))
+ {
+ insn = NULL;
+ break;
+ }
+ insn = NEXT_INSN (insn);
+ }
+
+ rtx_insn *set_insn;
+ if (insn == BB_HEAD (bb))
+ {
+ set_insn = emit_insn_before (set, insn);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\nbefore:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+ else
+ {
+ rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
+ set_insn = emit_insn_after (set, after);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nPlace:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, after);
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ if (inner_scalar)
+ {
+ /* Set the source in (vec_duplicate:V4SI (reg:SI 99)). */
+ rtx reg = XEXP (src, 0);
+ if ((REG_P (inner_scalar) || MEM_P (inner_scalar))
+ && GET_MODE (reg) != GET_MODE (inner_scalar))
+ inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0);
+ rtx set = gen_rtx_SET (reg, inner_scalar);
+ insn = emit_insn_before (set, set_insn);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nAdd:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nbefore:\n\n");
+ print_rtl_single (dump_file, set_insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
/* At entry of the nearest common dominator for basic blocks with
conversions/rcp/sqrt/rsqrt/round, generate a single
vxorps %xmmN, %xmmN, %xmmN
@@ -3155,7 +3282,6 @@ remove_partial_avx_dependency (void)
/* Generate an XMM vector SET. */
set = gen_rtx_SET (vec, src);
set_insn = emit_insn_before (set, insn);
- df_insn_rescan (set_insn);
if (cfun->can_throw_non_call_exceptions)
{
@@ -3188,35 +3314,10 @@ remove_partial_avx_dependency (void)
calculate_dominance_info (CDI_DOMINATORS);
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
- /* Generate a vxorps at entry of the nearest dominator for basic
- blocks with conversions, which is in the fake loop that
- contains the whole function, so that there is only a single
- vxorps in the whole function. */
- bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
- convert_bbs);
- while (bb->loop_father->latch
- != EXIT_BLOCK_PTR_FOR_FN (cfun))
- bb = get_immediate_dominator (CDI_DOMINATORS,
- bb->loop_father->header);
-
- set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
-
- insn = BB_HEAD (bb);
- while (insn && !NONDEBUG_INSN_P (insn))
- {
- if (insn == BB_END (bb))
- {
- insn = NULL;
- break;
- }
- insn = NEXT_INSN (insn);
- }
- if (insn == BB_HEAD (bb))
- set_insn = emit_insn_before (set, insn);
- else
- set_insn = emit_insn_after (set,
- insn ? PREV_INSN (insn) : BB_END (bb));
- df_insn_rescan (set_insn);
+ ix86_place_single_vector_set (v4sf_const0,
+ CONST0_RTX (V4SFmode),
+ convert_bbs);
+
loop_optimizer_finalize ();
if (!control_flow_insns.is_empty ())
@@ -3288,6 +3389,568 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
return new pass_remove_partial_avx_dependency (ctxt);
}
+/* Return a machine mode suitable for vector SIZE with SMODE inner
+ mode. */
+
+static machine_mode
+ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
+{
+ /* Use the inner scalar mode of vector broadcast source in:
+
+ (set (reg:V8DF 394)
+ (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
+
+ to compute the vector mode for broadcast from vector source.
+ */
+ if (VECTOR_MODE_P (smode))
+ smode = GET_MODE_INNER (smode);
+ scalar_mode s_mode = as_a <scalar_mode> (smode);
+ poly_uint64 nunits = size / GET_MODE_SIZE (smode);
+ machine_mode mode = mode_for_vector (s_mode, nunits).require ();
+ return mode;
+}
+
+/* Replace the source operand of instructions in VECTOR_INSNS with
+ VECTOR_CONST in VECTOR_MODE. */
+
+static void
+replace_vector_const (machine_mode vector_mode, rtx vector_const,
+ auto_bitmap &vector_insns,
+ machine_mode scalar_mode)
+{
+ bitmap_iterator bi;
+ unsigned int id;
+
+ EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
+ {
+ rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+ /* Get the single SET instruction. */
+ rtx set = single_set (insn);
+ rtx src = SET_SRC (set);
+ rtx dest = SET_DEST (set);
+ machine_mode mode = GET_MODE (dest);
+
+ rtx replace;
+ /* Replace the source operand with VECTOR_CONST. */
+ if (SUBREG_P (src) || mode == vector_mode)
+ replace = vector_const;
+ else
+ {
+ unsigned int size = GET_MODE_SIZE (mode);
+ if (size < ix86_regmode_natural_size (mode))
+ {
+ /* If the mode size is smaller than its natural size,
+ first insert an extra move with a QI vector SUBREG
+ of the same size to avoid validate_subreg failure. */
+ machine_mode vmode
+ = ix86_get_vector_cse_mode (size, scalar_mode);
+ rtx vreg;
+ if (mode == vmode)
+ vreg = vector_const;
+ else
+ {
+ vreg = gen_reg_rtx (vmode);
+ rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+ rtx pat = gen_rtx_SET (vreg, vsubreg);
+ rtx_insn *vinsn = emit_insn_before (pat, insn);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nInsert an extra move:\n\n");
+ print_rtl_single (dump_file, vinsn);
+ fprintf (dump_file, "\nbefore:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+ replace = gen_rtx_SUBREG (mode, vreg, 0);
+ }
+ else
+ replace = gen_rtx_SUBREG (mode, vector_const, 0);
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nReplace:\n\n");
+ print_rtl_single (dump_file, insn);
+ }
+ SET_SRC (set) = replace;
+ /* Drop possible dead definitions. */
+ PATTERN (insn) = set;
+ INSN_CODE (insn) = -1;
+ recog_memoized (insn);
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nwith:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\n");
+ }
+ df_insn_rescan (insn);
+ }
+}
+
+enum x86_cse_kind
+{
+ X86_CSE_CONST0_VECTOR,
+ X86_CSE_CONSTM1_VECTOR,
+ X86_CSE_VEC_DUP
+};
+
+struct redundant_load
+{
+ /* Bitmap of basic blocks with broadcast instructions. */
+ auto_bitmap bbs;
+ /* Bitmap of broadcast instructions. */
+ auto_bitmap insns;
+ /* The broadcast inner scalar. */
+ rtx val;
+ /* The inner scalar mode. */
+ machine_mode mode;
+ /* The instruction which sets the inner scalar. Nullptr if the inner
+ scalar is applied to the whole function, instead of within the same
+ block. */
+ rtx_insn *def_insn;
+ /* The widest broadcast source. */
+ rtx broadcast_source;
+ /* The widest broadcast register. */
+ rtx broadcast_reg;
+ /* The basic block of the broadcast instruction. */
+ basic_block bb;
+ /* The number of broadcast instructions with the same inner scalar. */
+ unsigned HOST_WIDE_INT count;
+ /* The threshold of broadcast instructions with the same inner
+ scalar. */
+ unsigned int threshold;
+ /* The widest broadcast size in bytes. */
+ unsigned int size;
+ /* Load kind. */
+ x86_cse_kind kind;
+};
+
+/* Return the inner scalar if OP is a broadcast, else return nullptr. */
+
+static rtx
+ix86_broadcast_inner (rtx op, machine_mode mode,
+ machine_mode *scalar_mode_p,
+ x86_cse_kind *kind_p, rtx_insn **insn_p)
+{
+ if (op == const0_rtx || op == CONST0_RTX (mode))
+ {
+ *scalar_mode_p = QImode;
+ *kind_p = X86_CSE_CONST0_VECTOR;
+ *insn_p = nullptr;
+ return const0_rtx;
+ }
+ else if ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && (op == constm1_rtx || op == CONSTM1_RTX (mode)))
+ || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ && float_vector_all_ones_operand (op, mode)))
+ {
+ *scalar_mode_p = QImode;
+ *kind_p = X86_CSE_CONSTM1_VECTOR;
+ *insn_p = nullptr;
+ return constm1_rtx;
+ }
+
+ mode = GET_MODE (op);
+ int nunits = GET_MODE_NUNITS (mode);
+ if (nunits < 2)
+ return nullptr;
+
+ *kind_p = X86_CSE_VEC_DUP;
+
+ rtx reg;
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ {
+ /* Only
+ (vec_duplicate:V4SI (reg:SI 99))
+ (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64]))
+ are supported. Set OP to the broadcast source by default. */
+ op = XEXP (op, 0);
+ reg = op;
+ if (SUBREG_P (op)
+ && SUBREG_BYTE (op) == 0
+ && !paradoxical_subreg_p (op))
+ reg = SUBREG_REG (op);
+ if (!REG_P (reg))
+ {
+ if (MEM_P (op)
+ && SYMBOL_REF_P (XEXP (op, 0))
+ && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+ {
+ /* Handle constant broadcast from memory. */
+ *scalar_mode_p = GET_MODE_INNER (mode);
+ *insn_p = nullptr;
+ return op;
+ }
+ return nullptr;
+ }
+ }
+ else if (CONST_VECTOR_P (op))
+ {
+ rtx first = XVECEXP (op, 0, 0);
+ for (int i = 1; i < nunits; ++i)
+ {
+ rtx tmp = XVECEXP (op, 0, i);
+ /* Vector duplicate value. */
+ if (!rtx_equal_p (tmp, first))
+ return nullptr;
+ }
+ *scalar_mode_p = GET_MODE (first);
+ *insn_p = nullptr;
+ return first;
+ }
+ else
+ return nullptr;
+
+ mode = GET_MODE (op);
+
+ /* Only single def chain is supported. */
+ df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
+ if (!ref
+ || DF_REF_IS_ARTIFICIAL (ref)
+ || DF_REF_NEXT_REG (ref) != nullptr)
+ return nullptr;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+ rtx set = single_set (insn);
+ if (!set)
+ return nullptr;
+
+ rtx src = SET_SRC (set);
+
+ if (CONST_INT_P (src))
+ {
+ /* Handle sequences like
+
+ (set (reg:SI 99)
+ (const_int 34 [0x22]))
+ (set (reg:V4SI 98)
+ (vec_duplicate:V4SI (reg:SI 99)))
+
+ Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
+ integer constant. */
+ op = src;
+ *insn_p = nullptr;
+ }
+ else
+ {
+ /* Handle sequences like
+
+ (set (reg:QI 105 [ c ])
+ (reg:QI 5 di [ c ]))
+ (set (reg:V64QI 102 [ _1 ])
+ (vec_duplicate:V64QI (reg:QI 105 [ c ])))
+
+ (set (reg/v:SI 116 [ argc ])
+ (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
+ (set (reg:V4SI 119 [ _45 ])
+ (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
+
+ (set (reg:SI 98 [ _1 ])
+ (sign_extend:SI (reg:QI 106 [ c ])))
+ (set (reg:V16SI 103 [ _2 ])
+ (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
+
+ (set (reg:SI 102 [ cost ])
+ (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
+ (set (reg:V4HI 103 [ _16 ])
+ (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
+
+ (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
+ (ashift:SI (reg:SI 158)
+ (subreg:QI (reg:SI 156 [ _2 ]) 0)))
+ (set (reg:V16HI 183 [ _61 ])
+ (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
+
+ Set *INSN_P to INSN and return the broadcast source otherwise. */
+ *insn_p = insn;
+ }
+
+ *scalar_mode_p = mode;
+ return op;
+}
+
+/* At entry of the nearest common dominator for basic blocks with vector
+ CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
+ vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
+ uses.
+
+ NB: We want to generate only a single widest vector set to cover the
+ whole function. The LCM algorithm isn't appropriate here since it
+ may place a vector set inside the loop. */
+
+static unsigned int
+remove_redundant_vector_load (void)
+{
+ timevar_push (TV_MACH_DEP);
+
+ auto_vec<redundant_load *> loads;
+ redundant_load *load;
+ basic_block bb;
+ rtx_insn *insn;
+ unsigned int i;
+
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ bool recursive_call_p = cfun->machine->recursive_function;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (!set)
+ continue;
+
+ /* Record single set vector instruction with CONST0_RTX and
+ CONSTM1_RTX source. Record basic blocks with CONST0_RTX and
+ CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the
+ maximum size of CONST0_RTX and CONSTM1_RTX. */
+
+ rtx dest = SET_DEST (set);
+ machine_mode mode = GET_MODE (dest);
+ /* Skip non-vector instruction. */
+ if (!VECTOR_MODE_P (mode))
+ continue;
+
+ rtx src = SET_SRC (set);
+ /* Skip non-vector load instruction. */
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ continue;
+
+ rtx_insn *def_insn;
+ machine_mode scalar_mode;
+ x86_cse_kind kind;
+ rtx val = ix86_broadcast_inner (src, mode, &scalar_mode,
+ &kind, &def_insn);
+ if (!val)
+ continue;
+
+ /* Remove redundant register loads if there are more than 2
+ loads will be used. */
+ unsigned int threshold = 2;
+
+ /* Check if there is a matching redundant vector load. */
+ bool matched = false;
+ FOR_EACH_VEC_ELT (loads, i, load)
+ if (load->val
+ && load->kind == kind
+ && load->mode == scalar_mode
+ && (load->bb == bb
+ || kind < X86_CSE_VEC_DUP
+ /* Non all 0s/1s vector load must be in the same
+ basic block if it is in a recursive call. */
+ || !recursive_call_p)
+ && rtx_equal_p (load->val, val))
+ {
+ /* Record vector instruction. */
+ bitmap_set_bit (load->insns, INSN_UID (insn));
+
+ /* Record the maximum vector size. */
+ if (load->size < GET_MODE_SIZE (mode))
+ load->size = GET_MODE_SIZE (mode);
+
+ /* Record the basic block. */
+ bitmap_set_bit (load->bbs, bb->index);
+ load->count++;
+ matched = true;
+ break;
+ }
+
+ if (matched)
+ continue;
+
+ /* We see this vector broadcast the first time. */
+ load = new redundant_load;
+
+ load->val = copy_rtx (val);
+ load->mode = scalar_mode;
+ load->size = GET_MODE_SIZE (mode);
+ load->def_insn = def_insn;
+ load->count = 1;
+ load->threshold = threshold;
+ load->bb = BLOCK_FOR_INSN (insn);
+ load->kind = kind;
+
+ bitmap_set_bit (load->insns, INSN_UID (insn));
+ bitmap_set_bit (load->bbs, bb->index);
+
+ loads.safe_push (load);
+ }
+ }
+
+ bool replaced = false;
+ rtx reg, broadcast_source, broadcast_reg;
+ FOR_EACH_VEC_ELT (loads, i, load)
+ if (load->count >= load->threshold)
+ {
+ machine_mode mode = ix86_get_vector_cse_mode (load->size,
+ load->mode);
+ broadcast_reg = gen_reg_rtx (mode);
+ if (load->def_insn)
+ {
+ /* Replace redundant vector loads with a single vector load
+ in the same basic block. */
+ reg = load->val;
+ if (load->mode != GET_MODE (reg))
+ reg = gen_rtx_SUBREG (load->mode, reg, 0);
+ broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+ replace_vector_const (mode, broadcast_reg, load->insns,
+ load->mode);
+ }
+ else
+ {
+ /* This is a constant integer/double vector. If the
+ inner scalar is 0 or -1, set vector to CONST0_RTX
+ or CONSTM1_RTX directly. */
+ rtx reg;
+ switch (load->kind)
+ {
+ case X86_CSE_CONST0_VECTOR:
+ broadcast_source = CONST0_RTX (mode);
+ break;
+ case X86_CSE_CONSTM1_VECTOR:
+ broadcast_source = CONSTM1_RTX (mode);
+ break;
+ default:
+ reg = gen_reg_rtx (load->mode);
+ broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+ break;
+ }
+ replace_vector_const (mode, broadcast_reg, load->insns,
+ load->mode);
+ }
+ load->broadcast_source = broadcast_source;
+ load->broadcast_reg = broadcast_reg;
+ replaced = true;
+ }
+
+ if (replaced)
+ {
+ auto_vec<rtx_insn *> control_flow_insns;
+
+ /* (Re-)discover loops so that bb->loop_father can be used in the
+ analysis below. */
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+ FOR_EACH_VEC_ELT (loads, i, load)
+ if (load->count >= load->threshold)
+ {
+ if (load->def_insn)
+ {
+ /* Insert a broadcast after the original scalar
+ definition. */
+ rtx set = gen_rtx_SET (load->broadcast_reg,
+ load->broadcast_source);
+ insn = emit_insn_after (set, load->def_insn);
+
+ if (cfun->can_throw_non_call_exceptions)
+ {
+ /* Handle REG_EH_REGION note in DEF_INSN. */
+ rtx note = find_reg_note (load->def_insn,
+ REG_EH_REGION, nullptr);
+ if (note)
+ {
+ control_flow_insns.safe_push (load->def_insn);
+ add_reg_note (insn, REG_EH_REGION,
+ XEXP (note, 0));
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nAdd:\n\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file, "\nafter:\n\n");
+ print_rtl_single (dump_file, load->def_insn);
+ fprintf (dump_file, "\n");
+ }
+ }
+ else
+ ix86_place_single_vector_set (load->broadcast_reg,
+ load->broadcast_source,
+ load->bbs,
+ (load->kind == X86_CSE_VEC_DUP
+ ? load->val
+ : nullptr));
+ }
+
+ loop_optimizer_finalize ();
+
+ if (!control_flow_insns.is_empty ())
+ {
+ free_dominance_info (CDI_DOMINATORS);
+
+ FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
+ if (control_flow_insn_p (insn))
+ {
+ /* Split the block after insn. There will be a fallthru
+ edge, which is OK so we keep it. We have to create
+ the exception edges ourselves. */
+ bb = BLOCK_FOR_INSN (insn);
+ split_block (bb, insn);
+ rtl_make_eh_edge (NULL, bb, BB_END (bb));
+ }
+ }
+
+ df_process_deferred_rescans ();
+ }
+
+ df_clear_flags (DF_DEFER_INSN_RESCAN);
+
+ timevar_pop (TV_MACH_DEP);
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_remove_redundant_vector_load =
+{
+ RTL_PASS, /* type */
+ "rrvl", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_remove_redundant_vector_load : public rtl_opt_pass
+{
+public:
+ pass_remove_redundant_vector_load (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *fun) final override
+ {
+ return (TARGET_SSE2
+ && optimize
+ && optimize_function_for_speed_p (fun));
+ }
+
+ unsigned int execute (function *) final override
+ {
+ return remove_redundant_vector_load ();
+ }
+}; // class pass_remove_redundant_vector_load
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+{
+ return new pass_remove_redundant_vector_load (ctxt);
+}
+
/* Convert legacy instructions that clobbers EFLAGS to APX_NF
instructions when there are no flag set between a flag
producer and user. */
@@ -3962,7 +4625,6 @@ ix86_get_function_versions_dispatcher (void *decl)
struct cgraph_node *node = NULL;
struct cgraph_node *default_node = NULL;
struct cgraph_function_version_info *node_v = NULL;
- struct cgraph_function_version_info *first_v = NULL;
tree dispatch_decl = NULL;
@@ -3979,37 +4641,16 @@ ix86_get_function_versions_dispatcher (void *decl)
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
- /* Find the default version and make it the first node. */
- first_v = node_v;
- /* Go to the beginning of the chain. */
- while (first_v->prev != NULL)
- first_v = first_v->prev;
- default_version_info = first_v;
- while (default_version_info != NULL)
- {
- if (is_function_default_version
- (default_version_info->this_node->decl))
- break;
- default_version_info = default_version_info->next;
- }
+ /* The default node is always the beginning of the chain. */
+ default_version_info = node_v;
+ while (default_version_info->prev != NULL)
+ default_version_info = default_version_info->prev;
+ default_node = default_version_info->this_node;
/* If there is no default node, just return NULL. */
- if (default_version_info == NULL)
+ if (!is_function_default_version (default_node->decl))
return NULL;
- /* Make default info the first node. */
- if (first_v != default_version_info)
- {
- default_version_info->prev->next = default_version_info->next;
- if (default_version_info->next)
- default_version_info->next->prev = default_version_info->prev;
- first_v->prev = default_version_info;
- default_version_info->next = first_v;
- default_version_info->prev = NULL;
- }
-
- default_node = default_version_info->this_node;
-
#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
if (targetm.has_ifunc_p ())
{
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index 24b0c4e..e3719b3 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -153,12 +153,13 @@ class scalar_chain
bitmap insns_conv;
hash_map<rtx, rtx> defs_map;
- unsigned n_sse_to_integer;
- unsigned n_integer_to_sse;
+ /* Cost of inserted conversion between ineteger and sse. */
+ int cost_sse_integer;
+ sreal weighted_cost_sse_integer;
auto_vec<rtx_insn *> control_flow_insns;
bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
- virtual int compute_convert_gain () = 0;
+ virtual bool compute_convert_gain () = 0;
int convert ();
protected:
@@ -184,11 +185,11 @@ class general_scalar_chain : public scalar_chain
public:
general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
: scalar_chain (smode_, vmode_) {}
- int compute_convert_gain () final override;
+ bool compute_convert_gain () final override;
private:
void convert_insn (rtx_insn *insn) final override;
- int vector_const_cost (rtx exp);
+ int vector_const_cost (rtx exp, basic_block bb);
rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
};
@@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain
{
public:
timode_scalar_chain () : scalar_chain (TImode, V1TImode) {}
- int compute_convert_gain () final override;
+ bool compute_convert_gain () final override;
private:
void fix_debug_reg_uses (rtx reg);
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index 19d78d7..6fa601d 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -118,8 +118,6 @@ DEF_PTA(SHA512)
DEF_PTA(SM4)
DEF_PTA(APX_F)
DEF_PTA(USER_MSR)
-DEF_PTA(EVEX512)
-DEF_PTA(AVX10_1_256)
DEF_PTA(AVX10_1)
DEF_PTA(AVX10_2)
DEF_PTA(AMX_AVX512)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index a9fac01..09cb133 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -259,9 +259,7 @@ static struct ix86_target_opts isa2_opts[] =
{ "-msm3", OPTION_MASK_ISA2_SM3 },
{ "-msha512", OPTION_MASK_ISA2_SHA512 },
{ "-msm4", OPTION_MASK_ISA2_SM4 },
- { "-mevex512", OPTION_MASK_ISA2_EVEX512 },
{ "-musermsr", OPTION_MASK_ISA2_USER_MSR },
- { "-mavx10.1-256", OPTION_MASK_ISA2_AVX10_1_256 },
{ "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 },
{ "-mavx10.2", OPTION_MASK_ISA2_AVX10_2 },
{ "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 },
@@ -713,8 +711,6 @@ ix86_function_specific_save (struct cl_target_option *ptr,
ptr->x_ix86_apx_features = opts->x_ix86_apx_features;
ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
- ptr->x_ix86_no_avx512_explicit = opts->x_ix86_no_avx512_explicit;
- ptr->x_ix86_no_avx10_1_explicit = opts->x_ix86_no_avx10_1_explicit;
ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
@@ -764,63 +760,63 @@ static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = {
/* This table must be in sync with enum processor_type in i386.h. */
static const struct processor_costs *processor_cost_table[] =
{
- &generic_cost,
- &i386_cost,
- &i486_cost,
- &pentium_cost,
- &lakemont_cost,
- &pentiumpro_cost,
- &pentium4_cost,
- &nocona_cost,
- &core_cost,
- &core_cost,
- &core_cost,
- &core_cost,
- &atom_cost,
- &slm_cost,
- &slm_cost,
- &slm_cost,
- &tremont_cost,
- &alderlake_cost,
- &alderlake_cost,
- &alderlake_cost,
- &skylake_cost,
- &skylake_cost,
- &icelake_cost,
- &icelake_cost,
- &icelake_cost,
- &skylake_cost,
- &icelake_cost,
- &skylake_cost,
- &icelake_cost,
- &alderlake_cost,
- &icelake_cost,
- &icelake_cost,
- &icelake_cost,
- &alderlake_cost,
- &alderlake_cost,
- &alderlake_cost,
- &icelake_cost,
- &intel_cost,
- &lujiazui_cost,
- &yongfeng_cost,
- &shijidadao_cost,
- &geode_cost,
- &k6_cost,
- &athlon_cost,
- &k8_cost,
- &amdfam10_cost,
- &bdver_cost,
- &bdver_cost,
- &bdver_cost,
- &bdver_cost,
- &btver1_cost,
- &btver2_cost,
- &znver1_cost,
- &znver2_cost,
- &znver3_cost,
- &znver4_cost,
- &znver5_cost
+ &generic_cost, /* PROCESSOR_GENERIC. */
+ &i386_cost, /* PROCESSOR_I386. */
+ &i486_cost, /* PROCESSOR_I486. */
+ &pentium_cost, /* PROCESSOR_PENTIUM. */
+ &lakemont_cost, /* PROCESSOR_LAKEMONT. */
+ &pentiumpro_cost, /* PROCESSOR_PENTIUMPRO. */
+ &pentium4_cost, /* PROCESSOR_PENTIUM4. */
+ &nocona_cost, /* PROCESSOR_NOCONA. */
+ &core_cost, /* PROCESSOR_CORE2. */
+ &core_cost, /* PROCESSOR_NEHALEM. */
+ &core_cost, /* PROCESSOR_SANDYBRIDGE. */
+ &core_cost, /* PROCESSOR_HASWELL. */
+ &atom_cost, /* PROCESSOR_BONNELL. */
+ &slm_cost, /* PROCESSOR_SILVERMONT. */
+ &slm_cost, /* PROCESSOR_GOLDMONT. */
+ &slm_cost, /* PROCESSOR_GOLDMONT_PLUS. */
+ &tremont_cost, /* PROCESSOR_TREMONT. */
+ &alderlake_cost, /* PROCESSOR_SIERRAFOREST. */
+ &alderlake_cost, /* PROCESSOR_GRANDRIDGE. */
+ &alderlake_cost, /* PROCESSOR_CLEARWATERFOREST. */
+ &skylake_cost, /* PROCESSOR_SKYLAKE. */
+ &skylake_cost, /* PROCESSOR_SKYLAKE_AVX512. */
+ &icelake_cost, /* PROCESSOR_CANNONLAKE. */
+ &icelake_cost, /* PROCESSOR_ICELAKE_CLIENT. */
+ &icelake_cost, /* PROCESSOR_ICELAKE_SERVER. */
+ &skylake_cost, /* PROCESSOR_CASCADELAKE. */
+ &icelake_cost, /* PROCESSOR_TIGERLAKE. */
+ &skylake_cost, /* PROCESSOR_COOPERLAKE. */
+ &icelake_cost, /* PROCESSOR_SAPPHIRERAPIDS. */
+ &alderlake_cost, /* PROCESSOR_ALDERLAKE. */
+ &icelake_cost, /* PROCESSOR_ROCKETLAKE. */
+ &icelake_cost, /* PROCESSOR_GRANITERAPIDS. */
+ &icelake_cost, /* PROCESSOR_GRANITERAPIDS_D. */
+ &alderlake_cost, /* PROCESSOR_ARROWLAKE. */
+ &alderlake_cost, /* PROCESSOR_ARROWLAKE_S. */
+ &alderlake_cost, /* PROCESSOR_PANTHERLAKE. */
+ &icelake_cost, /* PROCESSOR_DIAMONDRAPIDS. */
+ &alderlake_cost, /* PROCESSOR_INTEL. */
+ &lujiazui_cost, /* PROCESSOR_LUJIAZUI. */
+ &yongfeng_cost, /* PROCESSOR_YONGFENG. */
+ &shijidadao_cost, /* PROCESSOR_SHIJIDADAO. */
+ &geode_cost, /* PROCESSOR_GEODE. */
+ &k6_cost, /* PROCESSOR_K6. */
+ &athlon_cost, /* PROCESSOR_ATHLON. */
+ &k8_cost, /* PROCESSOR_K8. */
+ &amdfam10_cost, /* PROCESSOR_AMDFAM10. */
+ &bdver_cost, /* PROCESSOR_BDVER1. */
+ &bdver_cost, /* PROCESSOR_BDVER2. */
+ &bdver_cost, /* PROCESSOR_BDVER3. */
+ &bdver_cost, /* PROCESSOR_BDVER4. */
+ &btver1_cost, /* PROCESSOR_BTVER1. */
+ &btver2_cost, /* PROCESSOR_BTVER2. */
+ &znver1_cost, /* PROCESSOR_ZNVER1. */
+ &znver2_cost, /* PROCESSOR_ZNVER2. */
+ &znver3_cost, /* PROCESSOR_ZNVER3. */
+ &znver4_cost, /* PROCESSOR_ZNVER4. */
+ &znver5_cost /* PROCESSOR_ZNVER5. */
};
/* Guarantee that the array is aligned with enum processor_type. */
@@ -858,8 +854,6 @@ ix86_function_specific_restore (struct gcc_options *opts,
opts->x_ix86_apx_features = ptr->x_ix86_apx_features;
opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
- opts->x_ix86_no_avx512_explicit = ptr->x_ix86_no_avx512_explicit;
- opts->x_ix86_no_avx10_1_explicit = ptr->x_ix86_no_avx10_1_explicit;
opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
@@ -1131,11 +1125,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("sha512", OPT_msha512),
IX86_ATTR_ISA ("sm4", OPT_msm4),
IX86_ATTR_ISA ("apxf", OPT_mapxf),
- IX86_ATTR_ISA ("evex512", OPT_mevex512),
IX86_ATTR_ISA ("usermsr", OPT_musermsr),
- IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256),
IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1),
- IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1),
IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2),
IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512),
IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32),
@@ -1271,13 +1262,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
}
}
- /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */
- if (opt == OPT_msse4 && !opt_set_p)
- {
- opt = OPT_mno_sse4;
- opt_set_p = true;
- }
-
/* Process the option. */
if (opt == N_OPTS)
{
@@ -1436,18 +1420,6 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
target_clone_attr))
return error_mark_node;
- /* AVX10.1-256 will enable only 256 bit AVX512F features by setting all
- AVX512 related ISA flags and not setting EVEX512. When it is used
- with avx512 related function attribute, we need to enable 512 bit to
- align with the command line behavior. Manually set EVEX512 for this
- scenario. */
- if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256)
- && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F)
- && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
- && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)
- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))
- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
/* If the changed options are different from the default, rerun
ix86_option_override_internal, and then save the options away.
The string options are attribute options, and will be undone
@@ -1458,10 +1430,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
|| option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
|| option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
|| enum_opts_set.x_ix86_fpmath
- || enum_opts_set.x_prefer_vector_width_type
- || (!(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_1_256)
- && (opts->x_ix86_isa_flags2_explicit
- & OPTION_MASK_ISA2_AVX10_1_256)))
+ || enum_opts_set.x_prefer_vector_width_type)
{
/* If we are using the default tune= or arch=, undo the string assigned,
and use the default. */
@@ -2025,7 +1994,7 @@ ix86_option_override_internal (bool main_args_p,
struct gcc_options *opts_set)
{
unsigned int i;
- unsigned HOST_WIDE_INT ix86_arch_mask, avx512_isa_flags, avx512_isa_flags2;
+ unsigned HOST_WIDE_INT ix86_arch_mask;
const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
/* -mrecip options. */
@@ -2044,15 +2013,6 @@ ix86_option_override_internal (bool main_args_p,
{ "vec-sqrt", RECIP_MASK_VEC_SQRT },
};
- avx512_isa_flags = OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
- | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
- | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
- | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
- | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
- | OPTION_MASK_ISA_AVX512BITALG;
- avx512_isa_flags2 = OPTION_MASK_ISA2_AVX512FP16
- | OPTION_MASK_ISA2_AVX512BF16;
-
/* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
@@ -2674,107 +2634,6 @@ ix86_option_override_internal (bool main_args_p,
&= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
& ~opts->x_ix86_isa_flags_explicit);
- /* Emit a warning if AVX10.1 options is used with AVX512/EVEX512 options except
- for the following option combinations:
- 1. Both AVX10.1-512 and AVX512 with 512 bit vector width are enabled with no
- explicit disable on other AVX512 features.
- 2. Both AVX10.1-256 and AVX512 w/o 512 bit vector width are enabled with no
- explicit disable on other AVX512 features.
- 3. Both AVX10.1 and AVX512 are disabled. */
- if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2))
- {
- if (opts->x_ix86_no_avx512_explicit
- && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
- & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
- || ((~((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
- & opts->x_ix86_isa_flags2)
- & ((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
- & opts->x_ix86_isa_flags2_explicit)))))
- warning (0, "%<-mno-evex512%> or %<-mno-avx512XXX%> cannot disable "
- "AVX10 instructions when AVX10.1-512 is available in GCC 15, "
- "behavior will change to it will disable that part of "
- "AVX512 instructions since GCC 16");
- }
- else if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
- {
- if (TARGET_EVEX512_P (opts->x_ix86_isa_flags2)
- && (OPTION_MASK_ISA2_EVEX512 & opts->x_ix86_isa_flags2_explicit))
- {
- if (!TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- || !(OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
- {
- /* We should not emit 512 bit instructions under AVX10.1-256
- when EVEX512 is enabled w/o any AVX512 features enabled.
- Disable EVEX512 bit for this. */
- warning (0, "Using %<-mevex512%> without any AVX512 features "
- "enabled together with AVX10.1 only will not enable "
- "any AVX512 or AVX10.1-512 features, using 256 as "
- "max vector size");
- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_EVEX512;
- }
- else
- warning (0, "Vector size conflicts between AVX10.1 and AVX512, "
- "using 512 as max vector size");
- }
- else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
- && !(OPTION_MASK_ISA2_EVEX512
- & opts->x_ix86_isa_flags2_explicit))
- warning (0, "Vector size conflicts between AVX10.1 and AVX512, using "
- "512 as max vector size");
- else if (opts->x_ix86_no_avx512_explicit
- && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
- & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
- || ((~(avx512_isa_flags2 & opts->x_ix86_isa_flags2)
- & (avx512_isa_flags2
- & opts->x_ix86_isa_flags2_explicit)))))
- warning (0, "%<-mno-avx512XXX%> cannot disable AVX10 instructions "
- "when AVX10 is available in GCC 15, behavior will change "
- "to it will disable that part of AVX512 instructions since "
- "GCC 16");
- }
- else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && (OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
- {
- if (opts->x_ix86_no_avx10_1_explicit
- && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1)
- & opts->x_ix86_isa_flags2_explicit))
- {
- warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> cannot disable "
- "AVX512 instructions when %<-mavx512XXX%> in GCC 15, "
- "behavior will change to it will disable all the "
- "instructions in GCC 16");
- /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is
- disabled. */
- if (OPTION_MASK_ISA2_AVX10_1_256 & opts->x_ix86_isa_flags2_explicit)
- {
- opts->x_ix86_isa_flags = (~avx512_isa_flags
- & opts->x_ix86_isa_flags)
- | (avx512_isa_flags & opts->x_ix86_isa_flags
- & opts->x_ix86_isa_flags_explicit);
- opts->x_ix86_isa_flags2 = (~avx512_isa_flags2
- & opts->x_ix86_isa_flags2)
- | (avx512_isa_flags2 & opts->x_ix86_isa_flags2
- & opts->x_ix86_isa_flags2_explicit);
- }
- }
- }
-
- /* Set EVEX512 if one of the following conditions meets:
- 1. AVX512 is enabled while EVEX512 is not explicitly set/unset.
- 2. AVX10.1-512 is enabled. */
- if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)
- || (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)))
- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
- /* Enable all AVX512 related ISAs when AVX10.1 is enabled. */
- if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
- {
- opts->x_ix86_isa_flags |= avx512_isa_flags;
- opts->x_ix86_isa_flags2 |= avx512_isa_flags2;
- }
-
/* Validate -mpreferred-stack-boundary= value or default it to
PREFERRED_STACK_BOUNDARY_DEFAULT. */
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
@@ -2828,8 +2687,8 @@ ix86_option_override_internal (bool main_args_p,
if (flag_nop_mcount)
error ("%<-mnop-mcount%> is not compatible with this target");
#endif
- if (flag_nop_mcount && flag_pic)
- error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>");
+ if (flag_nop_mcount && flag_pic && !flag_plt)
+ error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>");
/* Accept -msseregparm only if at least SSE support is enabled. */
if (TARGET_SSEREGPARM_P (opts->x_target_flags)
@@ -3049,8 +2908,7 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
if (opts_set->x_ix86_move_max == PVW_NONE)
{
- if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+ if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_move_max = PVW_AVX512;
/* Align with vectorizer to avoid potential STLF issue. */
else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3076,8 +2934,7 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_store_max = opts->x_prefer_vector_width_type;
if (opts_set->x_ix86_store_max == PVW_NONE)
{
- if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+ if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_store_max = PVW_AVX512;
/* Align with vectorizer to avoid potential STLF issue. */
else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3374,13 +3231,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
case 'e':
if (TARGET_PREFER_AVX256)
{
- if (!TARGET_AVX512F || !TARGET_EVEX512)
- str = "avx512f,evex512,prefer-vector-width=512";
+ if (!TARGET_AVX512F)
+ str = "avx512f,prefer-vector-width=512";
else
str = "prefer-vector-width=512";
}
- else if (!TARGET_AVX512F || !TARGET_EVEX512)
- str = "avx512f,evex512";
+ else if (!TARGET_AVX512F)
+ str = "avx512f";
break;
default:
gcc_unreachable ();
@@ -3420,19 +3277,21 @@ ix86_set_func_type (tree fndecl)
interrupt function in this case. */
enum call_saved_registers_type no_callee_saved_registers
= TYPE_DEFAULT_CALL_SAVED_REGISTERS;
- if (lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ if (lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ no_callee_saved_registers = TYPE_PRESERVE_NONE;
+ else if ((lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ || (ix86_noreturn_no_callee_saved_registers
+ && TREE_THIS_VOLATILE (fndecl)
+ && optimize
+ && !optimize_debug
+ && (TREE_NOTHROW (fndecl) || !flag_exceptions)
+ && !lookup_attribute ("interrupt",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
+ && !lookup_attribute ("no_caller_saved_registers",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS;
- else if (ix86_noreturn_no_callee_saved_registers
- && TREE_THIS_VOLATILE (fndecl)
- && optimize
- && !optimize_debug
- && (TREE_NOTHROW (fndecl) || !flag_exceptions)
- && !lookup_attribute ("interrupt",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
- && !lookup_attribute ("no_caller_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
- no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP;
if (cfun->machine->func_type == TYPE_UNKNOWN)
{
@@ -3444,9 +3303,16 @@ ix86_set_func_type (tree fndecl)
"interrupt and naked attributes are not compatible");
if (no_callee_saved_registers)
- error_at (DECL_SOURCE_LOCATION (fndecl),
- "%qs and %qs attributes are not compatible",
- "interrupt", "no_callee_saved_registers");
+ {
+ const char *attr;
+ if (no_callee_saved_registers == TYPE_PRESERVE_NONE)
+ attr = "preserve_none";
+ else
+ attr = "no_callee_saved_registers";
+ error_at (DECL_SOURCE_LOCATION (fndecl),
+ "%qs and %qs attributes are not compatible",
+ "interrupt", attr);
+ }
int nargs = 0;
for (tree arg = DECL_ARGUMENTS (fndecl);
@@ -3468,21 +3334,13 @@ ix86_set_func_type (tree fndecl)
else
{
cfun->machine->func_type = TYPE_NORMAL;
- if (lookup_attribute ("no_caller_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ if (no_callee_saved_registers)
+ cfun->machine->call_saved_registers
+ = no_callee_saved_registers;
+ else if (lookup_attribute ("no_caller_saved_registers",
+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
cfun->machine->call_saved_registers
= TYPE_NO_CALLER_SAVED_REGISTERS;
- if (no_callee_saved_registers)
- {
- if (cfun->machine->call_saved_registers
- == TYPE_NO_CALLER_SAVED_REGISTERS)
- error_at (DECL_SOURCE_LOCATION (fndecl),
- "%qs and %qs attributes are not compatible",
- "no_caller_saved_registers",
- "no_callee_saved_registers");
- cfun->machine->call_saved_registers
- = no_callee_saved_registers;
- }
}
}
}
@@ -3671,11 +3529,21 @@ ix86_set_current_function (tree fndecl)
|| (cfun->machine->call_saved_registers
== TYPE_NO_CALLER_SAVED_REGISTERS))
{
- /* Don't allow SSE, MMX nor x87 instructions since they
- may change processor state. */
+ /* Don't allow AVX, AVX512, MMX nor x87 instructions since they
+ may change processor state. Don't allow SSE instructions in
+ exception/interrupt service routines. */
const char *isa;
if (TARGET_SSE)
- isa = "SSE";
+ {
+ if (TARGET_AVX512F)
+ isa = "AVX512";
+ else if (TARGET_AVX)
+ isa = "AVX";
+ else if (cfun->machine->func_type != TYPE_NORMAL)
+ isa = "SSE";
+ else
+ isa = NULL;
+ }
else if (TARGET_MMX)
isa = "MMX/3Dnow";
else if (TARGET_80387)
@@ -4100,9 +3968,50 @@ ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
}
static tree
-ix86_handle_call_saved_registers_attribute (tree *, tree, tree,
+ix86_handle_call_saved_registers_attribute (tree *node, tree name, tree,
int, bool *)
{
+ const char *attr1 = nullptr;
+ const char *attr2 = nullptr;
+
+ if (is_attribute_p ("no_callee_saved_registers", name))
+ {
+ /* Disallow preserve_none and no_caller_saved_registers
+ attributes. */
+ attr1 = "no_callee_saved_registers";
+ if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node)))
+ attr2 = "preserve_none";
+ else if (lookup_attribute ("no_caller_saved_registers",
+ TYPE_ATTRIBUTES (*node)))
+ attr2 = "no_caller_saved_registers";
+ }
+ else if (is_attribute_p ("no_caller_saved_registers", name))
+ {
+ /* Disallow preserve_none and no_callee_saved_registers
+ attributes. */
+ attr1 = "no_caller_saved_registers";
+ if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node)))
+ attr2 = "preserve_none";
+ else if (lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (*node)))
+ attr2 = "no_callee_saved_registers";
+ }
+ else if (is_attribute_p ("preserve_none", name))
+ {
+ /* Disallow no_callee_saved_registers and no_caller_saved_registers
+ attributes. */
+ attr1 = "preserve_none";
+ if (lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (*node)))
+ attr2 = "no_caller_saved_registers";
+ else if (lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (*node)))
+ attr2 = "no_callee_saved_registers";
+ }
+
+ if (attr2)
+ error ("%qs and %qs attributes are not compatible", attr1, attr2);
+
return NULL_TREE;
}
@@ -4264,6 +4173,8 @@ static const attribute_spec ix86_gnu_attributes[] =
ix86_handle_interrupt_attribute, NULL },
{ "no_caller_saved_registers", 0, 0, false, true, true, false,
ix86_handle_call_saved_registers_attribute, NULL },
+ { "preserve_none", 0, 0, false, true, true, true,
+ ix86_handle_call_saved_registers_attribute, NULL },
{ "no_callee_saved_registers", 0, 0, false, true, true, true,
ix86_handle_call_saved_registers_attribute, NULL },
{ "naked", 0, 0, true, false, false, false,
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 39f8bc6..06f0288 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,5 +35,6 @@ along with GCC; see the file COPYING3. If not see
PR116174. */
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
+ INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bea3fd4..69bc0ee 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -186,6 +186,7 @@ extern void ix86_expand_v2di_ashiftrt (rtx[]);
extern rtx ix86_replace_reg_with_reg (rtx, rtx, rtx);
extern rtx ix86_find_base_term (rtx);
extern bool ix86_check_movabs (rtx, int);
+extern bool ix86_check_movs (rtx, int);
extern bool ix86_check_no_addr_space (rtx);
extern void ix86_split_idivmod (machine_mode, rtx[], bool);
extern bool ix86_hardreg_mov_ok (rtx, rtx);
@@ -198,6 +199,7 @@ extern int ix86_attr_length_vex_default (rtx_insn *, bool, bool);
extern rtx ix86_libcall_value (machine_mode);
extern bool ix86_function_arg_regno_p (int);
extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern void ix86_asm_output_labelref (FILE *, const char *, const char *);
extern void ix86_call_abi_override (const_tree);
extern int ix86_reg_parm_stack_space (const_tree);
@@ -280,6 +282,7 @@ extern tree ix86_valid_target_attribute_tree (tree, tree,
struct gcc_options *,
struct gcc_options *, bool);
extern unsigned int ix86_get_callcvt (const_tree);
+extern bool ix86_type_no_callee_saved_registers_p (const_tree);
#endif
@@ -427,12 +430,21 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
(gcc::context *);
extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
(gcc::context *);
+extern rtl_opt_pass *make_pass_remove_redundant_vector_load
+ (gcc::context *);
extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
extern bool ix86_has_no_direct_extern_access;
extern bool ix86_rpad_gate ();
+extern sbitmap ix86_get_separate_components (void);
+extern sbitmap ix86_components_for_bb (basic_block);
+extern void ix86_disqualify_components (sbitmap, edge, sbitmap, bool);
+extern void ix86_emit_prologue_components (sbitmap);
+extern void ix86_emit_epilogue_components (sbitmap);
+extern void ix86_set_handled_components (sbitmap);
+
/* In i386-expand.cc. */
bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
HOST_WIDE_INT*);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b172f71..b64175d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-features.h"
#include "function-abi.h"
#include "rtl-error.h"
+#include "gimple-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -334,6 +335,14 @@ static int const x86_64_ms_abi_int_parameter_registers[4] =
CX_REG, DX_REG, R8_REG, R9_REG
};
+/* Similar as Clang's preserve_none function parameter passing.
+ NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */
+
+static int const x86_64_preserve_none_int_parameter_registers[6] =
+{
+ R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
+};
+
static int const x86_64_int_return_registers[4] =
{
AX_REG, DX_REG, DI_REG, SI_REG
@@ -459,7 +468,8 @@ int ix86_arch_specified;
red-zone.
NB: Don't use red-zone for functions with no_caller_saved_registers
- and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+ and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
+ for 31 GPRs or 15 GPRs + 16 XMM registers.
TODO: If we can reserve the first 2 WORDs, for PUSH and, another
for CALL, in red-zone, we can allow local indirect jumps with
@@ -470,7 +480,7 @@ ix86_using_red_zone (void)
{
return (TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
- && (!TARGET_APX_EGPR
+ && ((!TARGET_APX_EGPR && !TARGET_SSE)
|| (cfun->machine->call_saved_registers
!= TYPE_NO_CALLER_SAVED_REGISTERS))
&& (!cfun->machine->has_local_indirect_jump
@@ -897,6 +907,18 @@ x86_64_elf_unique_section (tree decl, int reloc)
default_unique_section (decl, reloc);
}
+/* Return true if TYPE has no_callee_saved_registers or preserve_none
+ attribute. */
+
+bool
+ix86_type_no_callee_saved_registers_p (const_tree type)
+{
+ return (lookup_attribute ("no_callee_saved_registers",
+ TYPE_ATTRIBUTES (type)) != NULL
+ || lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (type)) != NULL);
+}
+
#ifdef COMMON_ASM_OP
#ifndef LARGECOMM_SECTION_ASM_OP
@@ -1018,11 +1040,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
/* Sibling call isn't OK if callee has no callee-saved registers
and the calling function has callee-saved registers. */
- if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
- && (cfun->machine->call_saved_registers
- != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
- && lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (type)))
+ if ((cfun->machine->call_saved_registers
+ != TYPE_NO_CALLEE_SAVED_REGISTERS)
+ && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
+ && ix86_type_no_callee_saved_registers_p (type))
return false;
/* If outgoing reg parm stack space changes, we cannot do sibcall. */
@@ -1187,10 +1208,16 @@ ix86_comp_type_attributes (const_tree type1, const_tree type2)
!= ix86_function_regparm (type2, NULL))
return 0;
- if (lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (type1))
- != lookup_attribute ("no_callee_saved_registers",
- TYPE_ATTRIBUTES (type2)))
+ if (ix86_type_no_callee_saved_registers_p (type1)
+ != ix86_type_no_callee_saved_registers_p (type2))
+ return 0;
+
+ /* preserve_none attribute uses a different calling convention is
+ only for 64-bit. */
+ if (TARGET_64BIT
+ && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
+ != lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (type2))))
return 0;
return 1;
@@ -1552,7 +1579,10 @@ ix86_function_arg_regno_p (int regno)
if (call_abi == SYSV_ABI && regno == AX_REG)
return true;
- if (call_abi == MS_ABI)
+ if (cfun
+ && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else if (call_abi == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
@@ -1715,6 +1745,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname,
}
}
+/* Output a user-defined label. In AT&T syntax, registers are prefixed
+ with %, so labels require no punctuation. In Intel syntax, registers
+ are unprefixed, so labels may clash with registers or other operators,
+ and require quoting. */
+void
+ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
+{
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fprintf (file, "%s%s", prefix, label);
+ else
+ fprintf (file, "\"%s%s\"", prefix, label);
+}
+
/* Implementation of call abi switching target hook. Specific to FNDECL
the specific call register sets are set. See also
ix86_conditional_register_usage for more details. */
@@ -1794,8 +1837,7 @@ ix86_init_pic_reg (void)
add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
}
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
insert_insn_on_edge (seq, entry_edge);
@@ -1822,6 +1864,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
memset (cum, 0, sizeof (*cum));
+ tree preserve_none_type;
if (fndecl)
{
target = cgraph_node::get (fndecl);
@@ -1830,12 +1873,24 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
target = target->function_symbol ();
local_info_node = cgraph_node::local_info_node (target->decl);
cum->call_abi = ix86_function_abi (target->decl);
+ preserve_none_type = TREE_TYPE (target->decl);
}
else
- cum->call_abi = ix86_function_abi (fndecl);
+ {
+ cum->call_abi = ix86_function_abi (fndecl);
+ preserve_none_type = TREE_TYPE (fndecl);
+ }
}
else
- cum->call_abi = ix86_function_type_abi (fntype);
+ {
+ cum->call_abi = ix86_function_type_abi (fntype);
+ preserve_none_type = fntype;
+ }
+ cum->preserve_none_abi
+ = (preserve_none_type
+ && (lookup_attribute ("preserve_none",
+ TYPE_ATTRIBUTES (preserve_none_type))
+ != nullptr));
cum->caller = caller;
@@ -1997,8 +2052,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
&& GET_MODE_INNER (mode) == innermode)
{
- if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
- && !TARGET_IAMCU)
+ if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
{
static bool warnedavx512f;
static bool warnedavx512f_ret;
@@ -3409,9 +3463,15 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
break;
}
+ const int *parm_regs;
+ if (cum->preserve_none_abi)
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else
+ parm_regs = x86_64_int_parameter_registers;
+
return construct_container (mode, orig_mode, type, 0, cum->nregs,
cum->sse_nregs,
- &x86_64_int_parameter_registers [cum->regno],
+ &parm_regs[cum->regno],
cum->sse_regno);
}
@@ -4421,7 +4481,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
/* AVX512F values are returned in ZMM0 if available. */
if (size == 64)
- return !TARGET_AVX512F || !TARGET_EVEX512;
+ return !TARGET_AVX512F;
}
if (mode == XFmode)
@@ -4576,6 +4636,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
if (max > X86_64_REGPARM_MAX)
max = X86_64_REGPARM_MAX;
+ const int *parm_regs;
+ if (cum->preserve_none_abi)
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else
+ parm_regs = x86_64_int_parameter_registers;
+
for (i = cum->regno; i < max; i++)
{
mem = gen_rtx_MEM (word_mode,
@@ -4583,8 +4649,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
emit_move_insn (mem,
- gen_rtx_REG (word_mode,
- x86_64_int_parameter_registers[i]));
+ gen_rtx_REG (word_mode, parm_regs[i]));
}
if (ix86_varargs_fpr_size)
@@ -4738,8 +4803,7 @@ ix86_va_start (tree valist, rtx nextarg)
start_sequence ();
emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
push_topmost_sequence ();
emit_insn_after (seq, entry_of_function ());
@@ -5179,6 +5243,27 @@ ix86_check_movabs (rtx insn, int opnum)
return volatile_ok || !MEM_VOLATILE_P (mem);
}
+/* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
+bool
+ix86_check_movs (rtx insn, int idx)
+{
+ rtx pat = PATTERN (insn);
+ gcc_assert (GET_CODE (pat) == PARALLEL);
+
+ rtx set = XVECEXP (pat, 0, idx);
+ gcc_assert (GET_CODE (set) == SET);
+
+ rtx dst = SET_DEST (set);
+ gcc_assert (MEM_P (dst));
+
+ rtx src = SET_SRC (set);
+ gcc_assert (MEM_P (src));
+
+ return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
+ && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
+ || Pmode == word_mode));
+}
+
/* Return false if INSN contains a MEM with a non-default address space. */
bool
ix86_check_no_addr_space (rtx insn)
@@ -5355,7 +5440,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
switch (GET_MODE_SIZE (mode))
{
case 64:
- if (TARGET_AVX512F && TARGET_EVEX512)
+ if (TARGET_AVX512F)
return 2;
break;
case 32:
@@ -5408,10 +5493,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vpxord\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxord\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxord\t%g0, %g0, %g0";
}
return "vpxor\t%x0, %x0, %x0";
@@ -5427,19 +5510,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vxorpd\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vxorpd\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vxorpd\t%g0, %g0, %g0";
}
else
{
if (TARGET_AVX512VL)
return "vpxorq\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxorq\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxorq\t%g0, %g0, %g0";
}
}
return "vxorpd\t%x0, %x0, %x0";
@@ -5456,19 +5535,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vxorps\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vxorps\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vxorps\t%g0, %g0, %g0";
}
else
{
if (TARGET_AVX512VL)
return "vpxord\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxord\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxord\t%g0, %g0, %g0";
}
}
return "vxorps\t%x0, %x0, %x0";
@@ -5489,7 +5564,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
case MODE_XI:
case MODE_V8DF:
case MODE_V16SF:
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
case MODE_OI:
@@ -5505,10 +5580,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
- else if (TARGET_EVEX512)
- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
else
- gcc_unreachable ();
+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
}
return (TARGET_AVX
? "vpcmpeqd\t%0, %0, %0"
@@ -5522,7 +5595,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (GET_MODE_SIZE (mode) == 64)
{
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpcmpeqd\t%t0, %t0, %t0";
}
else if (GET_MODE_SIZE (mode) == 32)
@@ -5534,7 +5607,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
}
else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
{
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpcmpeqd\t%x0, %x0, %x0";
}
@@ -5645,8 +5718,6 @@ ix86_get_ssemov (rtx *operands, unsigned size,
|| memory_operand (operands[1], mode))
gcc_unreachable ();
size = 64;
- /* We need TARGET_EVEX512 to move into zmm register. */
- gcc_assert (TARGET_EVEX512);
switch (type)
{
case opcode_int:
@@ -5685,7 +5756,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovaps");
else
opcode = (misaligned_p
- ? (TARGET_AVX512BW
+ ? (TARGET_AVX512BW && evex_reg_p
? "vmovdqu16"
: "%vmovdqu")
: "%vmovdqa");
@@ -5727,7 +5798,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovaps");
else
opcode = (misaligned_p
- ? (TARGET_AVX512BW
+ ? (TARGET_AVX512BW && evex_reg_p
? "vmovdqu8"
: "%vmovdqu")
: "%vmovdqa");
@@ -5747,7 +5818,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovaps");
else
opcode = (misaligned_p
- ? (TARGET_AVX512BW
+ ? (TARGET_AVX512BW && evex_reg_p
? "vmovdqu16"
: "%vmovdqu")
: "%vmovdqa");
@@ -6700,9 +6771,7 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
|| !frame_pointer_needed));
case TYPE_NO_CALLEE_SAVED_REGISTERS:
- return false;
-
- case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
+ case TYPE_PRESERVE_NONE:
if (regno != HARD_FRAME_POINTER_REGNUM)
return false;
break;
@@ -6779,7 +6848,9 @@ ix86_nsaved_sseregs (void)
int nregs = 0;
int regno;
- if (!TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT_MS_ABI
+ && (cfun->machine->call_saved_registers
+ != TYPE_NO_CALLER_SAVED_REGISTERS))
return 0;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
@@ -6887,6 +6958,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
&& (nregs + aligned) >= 3;
}
+/* Check if push/pop should be used to save/restore registers. */
+static bool
+save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
+{
+ return ((!to_allocate && cfun->machine->frame.nregs <= 1)
+ || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ /* If stack clash probing needs a loop, then it needs a
+ scratch register. But the returned register is only guaranteed
+ to be safe to use after register saves are complete. So if
+ stack clash protections are enabled and the allocated frame is
+ larger than the probe interval, then use pushes to save
+ callee saved registers. */
+ || (flag_stack_clash_protection
+ && !ix86_target_stack_probe ()
+ && to_allocate > get_probe_interval ()));
+}
+
/* Fill structure ix86_frame about frame of currently computed function. */
static void
@@ -6967,12 +7058,18 @@ ix86_compute_frame_layout (void)
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
gcc_assert (preferred_alignment <= stack_alignment_needed);
- /* The only ABI saving SSE regs should be 64-bit ms_abi. */
- gcc_assert (TARGET_64BIT || !frame->nsseregs);
+ /* The only ABI saving SSE regs should be 64-bit ms_abi or with
+ no_caller_saved_registers attribue. */
+ gcc_assert (TARGET_64BIT
+ || (cfun->machine->call_saved_registers
+ == TYPE_NO_CALLER_SAVED_REGISTERS)
+ || !frame->nsseregs);
if (TARGET_64BIT && m->call_ms2sysv)
{
gcc_assert (stack_alignment_needed >= 16);
- gcc_assert (!frame->nsseregs);
+ gcc_assert ((cfun->machine->call_saved_registers
+ == TYPE_NO_CALLER_SAVED_REGISTERS)
+ || !frame->nsseregs);
}
/* For SEH we have to limit the amount of code movement into the prologue.
@@ -7171,20 +7268,7 @@ ix86_compute_frame_layout (void)
/* Size prologue needs to allocate. */
to_allocate = offset - frame->sse_reg_save_offset;
- if ((!to_allocate && frame->nregs <= 1)
- || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
- /* If static stack checking is enabled and done with probes,
- the registers need to be saved before allocating the frame. */
- || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
- /* If stack clash probing needs a loop, then it needs a
- scratch register. But the returned register is only guaranteed
- to be safe to use after register saves are complete. So if
- stack clash protections are enabled and the allocated frame is
- larger than the probe interval, then use pushes to save
- callee saved registers. */
- || (flag_stack_clash_protection
- && !ix86_target_stack_probe ()
- && to_allocate > get_probe_interval ()))
+ if (save_regs_using_push_pop (to_allocate))
frame->save_regs_using_mov = false;
if (ix86_using_red_zone ()
@@ -7642,7 +7726,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
{
- ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+ /* Skip registers, already processed by shrink wrap separate. */
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
cfa_offset -= UNITS_PER_WORD;
}
}
@@ -7735,8 +7821,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
add_frame_related_expr = true;
}
- insn = emit_insn (gen_pro_epilogue_adjust_stack_add
- (Pmode, dest, src, addend));
+ /* Shrink wrap separate may insert prologue between TEST and JMP. In order
+ not to affect EFlags, emit add without reg clobbering. */
+ if (crtl->shrink_wrapped_separate)
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
+ (Pmode, dest, src, addend));
+ else
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_add
+ (Pmode, dest, src, addend));
+
if (style >= 0)
ix86_add_queued_cfa_restore_notes (insn);
@@ -7920,6 +8013,15 @@ ix86_update_stack_boundary (void)
if (ix86_tls_descriptor_calls_expanded_in_cfun
&& crtl->preferred_stack_boundary < 128)
crtl->preferred_stack_boundary = 128;
+
+ /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
+ are 32 bits, but if force_align_arg_pointer is specified, it should
+ prefer 128 bits for a backward-compatibility reason, which is also
+ what the doc suggests. */
+ if (lookup_attribute ("force_align_arg_pointer",
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
+ && crtl->preferred_stack_boundary < 128)
+ crtl->preferred_stack_boundary = 128;
}
/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
@@ -7950,8 +8052,7 @@ ix86_get_drap_rtx (void)
start_sequence ();
drap_vreg = copy_to_reg (arg_ptr);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
if (!optimize)
@@ -8472,6 +8573,128 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Data passed to ix86_update_stack_alignment. */
+struct stack_access_data
+{
+ /* The stack access register. */
+ const_rtx reg;
+ /* Pointer to stack alignment. */
+ unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment if the memory is referenced by the stack access register. */
+ stack_access_data *p = (stack_access_data *) data;
+
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ {
+ auto op = *iter;
+ if (MEM_P (op))
+ {
+ if (reg_mentioned_p (p->reg, XEXP (op, 0)))
+ {
+ unsigned int alignment = MEM_ALIGN (op);
+
+ if (alignment > *p->stack_alignment)
+ *p->stack_alignment = alignment;
+ break;
+ }
+ else
+ iter.skip_subrtxes ();
+ }
+ }
+}
+
+/* Helper function for ix86_find_all_reg_uses. */
+
+static void
+ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
+ rtx set, unsigned int regno,
+ auto_bitmap &worklist)
+{
+ rtx dest = SET_DEST (set);
+
+ if (!REG_P (dest))
+ return;
+
+ /* Reject non-Pmode modes. */
+ if (GET_MODE (dest) != Pmode)
+ return;
+
+ unsigned int dst_regno = REGNO (dest);
+
+ if (TEST_HARD_REG_BIT (regset, dst_regno))
+ return;
+
+ const_rtx src = SET_SRC (set);
+
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, src, ALL)
+ {
+ auto op = *iter;
+
+ if (MEM_P (op))
+ iter.skip_subrtxes ();
+
+ if (REG_P (op) && REGNO (op) == regno)
+ {
+ /* Add this register to register set. */
+ add_to_hard_reg_set (&regset, Pmode, dst_regno);
+ bitmap_set_bit (worklist, dst_regno);
+ break;
+ }
+ }
+}
+
+/* Find all registers defined with register REGNO. */
+
+static void
+ix86_find_all_reg_uses (HARD_REG_SET &regset,
+ unsigned int regno, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (regno);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ unsigned int ref_regno = DF_REF_REGNO (ref);
+
+ rtx set = single_set (insn);
+ if (set)
+ {
+ ix86_find_all_reg_uses_1 (regset, set,
+ ref_regno, worklist);
+ continue;
+ }
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+
+ if (GET_CODE (exp) == SET)
+ ix86_find_all_reg_uses_1 (regset, exp,
+ ref_regno, worklist);
+ }
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8490,10 +8713,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8505,27 +8724,67 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+
+ add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+
+ unsigned int regno;
+
+ do
+ {
+ regno = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+ stack_access_data data;
+
+ data.stack_alignment = &stack_alignment;
+
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (regno);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ data.reg = DF_REF_REG (ref);
+ note_stores (insn, ix86_update_stack_alignment, &data);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
@@ -9035,11 +9294,22 @@ ix86_expand_prologue (void)
doing this if we have to probe the stack; at least on x86_64 the
stack probe can turn into a call that clobbers a red zone location. */
else if (ix86_using_red_zone ()
- && (! TARGET_STACK_PROBE
- || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+ && (! TARGET_STACK_PROBE
+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
{
+ HOST_WIDE_INT allocate_offset;
+ if (crtl->shrink_wrapped_separate)
+ {
+ allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
+
+ /* Adjust the total offset at the beginning of the function. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (allocate_offset), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
+ }
+
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- cfun->machine->red_zone_used = true;
int_registers_saved = true;
}
}
@@ -9617,30 +9887,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
{
- rtx reg = gen_rtx_REG (word_mode, regno);
- rtx mem;
- rtx_insn *insn;
-
- mem = choose_baseaddr (cfa_offset, NULL);
- mem = gen_frame_mem (word_mode, mem);
- insn = emit_move_insn (reg, mem);
- if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+ /* Skip registers, already processed by shrink wrap separate. */
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
{
- /* Previously we'd represented the CFA as an expression
- like *(%ebp - 8). We've just popped that value from
- the stack, which means we need to reset the CFA to
- the drap register. This will remain until we restore
- the stack pointer. */
- add_reg_note (insn, REG_CFA_DEF_CFA, reg);
- RTX_FRAME_RELATED_P (insn) = 1;
+ rtx reg = gen_rtx_REG (word_mode, regno);
+ rtx mem;
+ rtx_insn *insn;
- /* This means that the DRAP register is valid for addressing. */
- m->fs.drap_valid = true;
- }
- else
- ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+ mem = choose_baseaddr (cfa_offset, NULL);
+ mem = gen_frame_mem (word_mode, mem);
+ insn = emit_move_insn (reg, mem);
+ if (m->fs.cfa_reg == crtl->drap_reg
+ && regno == REGNO (crtl->drap_reg))
+ {
+ /* Previously we'd represented the CFA as an expression
+ like *(%ebp - 8). We've just popped that value from
+ the stack, which means we need to reset the CFA to
+ the drap register. This will remain until we restore
+ the stack pointer. */
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* DRAP register is valid for addressing. */
+ m->fs.drap_valid = true;
+ }
+ else
+ ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+ }
cfa_offset -= UNITS_PER_WORD;
}
}
@@ -9919,10 +10194,11 @@ ix86_expand_epilogue (int style)
less work than reloading sp and popping the register. */
else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
restore_regs_via_mov = true;
- else if (TARGET_EPILOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue
- && (frame.nregs > 1
- || m->fs.sp_offset != reg_save_offset))
+ else if (crtl->shrink_wrapped_separate
+ || (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs > 1
+ || m->fs.sp_offset != reg_save_offset)))
restore_regs_via_mov = true;
else if (frame_pointer_needed
&& !frame.nregs
@@ -9936,6 +10212,9 @@ ix86_expand_epilogue (int style)
else
restore_regs_via_mov = false;
+ if (crtl->shrink_wrapped_separate)
+ gcc_assert (restore_regs_via_mov);
+
if (restore_regs_via_mov || frame.nsseregs)
{
/* Ensure that the entire register save area is addressable via
@@ -9988,6 +10267,7 @@ ix86_expand_epilogue (int style)
gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
gcc_assert (!crtl->drap_reg);
gcc_assert (!frame.nregs);
+ gcc_assert (!crtl->shrink_wrapped_separate);
}
else if (restore_regs_via_mov)
{
@@ -10002,6 +10282,8 @@ ix86_expand_epilogue (int style)
rtx sa = EH_RETURN_STACKADJ_RTX;
rtx_insn *insn;
+ gcc_assert (!crtl->shrink_wrapped_separate);
+
/* Stack realignment doesn't work with eh_return. */
if (crtl->stack_realign_needed)
sorry ("Stack realignment not supported with "
@@ -11183,6 +11465,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
x = XVECEXP (x, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+ case UNSPEC_SECREL32:
+ x = XVECEXP (x, 0, 0);
+ return GET_CODE (x) == SYMBOL_REF;
default:
return false;
}
@@ -11230,7 +11515,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
case E_OImode:
case E_XImode:
if (!standard_sse_constant_p (x, mode)
- && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
+ && GET_MODE_SIZE (TARGET_AVX512F
? XImode
: (TARGET_AVX
? OImode
@@ -11319,6 +11604,9 @@ legitimate_pic_operand_p (rtx x)
x = XVECEXP (inner, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_SECREL32:
+ x = XVECEXP (inner, 0, 0);
+ return GET_CODE (x) == SYMBOL_REF;
case UNSPEC_MACHOPIC_OFFSET:
return legitimate_pic_address_disp_p (x);
default:
@@ -11499,6 +11787,9 @@ legitimate_pic_address_disp_p (rtx disp)
disp = XVECEXP (disp, 0, 0);
return (GET_CODE (disp) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+ case UNSPEC_SECREL32:
+ disp = XVECEXP (disp, 0, 0);
+ return GET_CODE (disp) == SYMBOL_REF;
}
return false;
@@ -11776,6 +12067,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
case UNSPEC_INDNTPOFF:
case UNSPEC_NTPOFF:
case UNSPEC_DTPOFF:
+ case UNSPEC_SECREL32:
break;
default:
@@ -11801,7 +12093,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
|| GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
|| !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
|| (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
- && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
/* Non-constant pic memory reference. */
return false;
}
@@ -12125,6 +12418,24 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg)
return tp;
}
+/* Construct the SYMBOL_REF for the _tls_index symbol. */
+
+static GTY(()) rtx ix86_tls_index_symbol;
+
+#if TARGET_WIN32_TLS
+static rtx
+ix86_tls_index (void)
+{
+ if (!ix86_tls_index_symbol)
+ ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
+
+ if (flag_pic)
+ return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL));
+ else
+ return ix86_tls_index_symbol;
+}
+#endif
+
/* Construct the SYMBOL_REF for the tls_get_addr function. */
static GTY(()) rtx ix86_tls_symbol;
@@ -12183,6 +12494,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
machine_mode tp_mode = Pmode;
int type;
+#if TARGET_WIN32_TLS
+ off = gen_const_mem (SImode, ix86_tls_index ());
+ set_mem_alias_set (off, GOT_ALIAS_SET);
+
+ tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44));
+ set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG);
+
+ if (TARGET_64BIT)
+ off = convert_to_mode (Pmode, off, 1);
+
+ base = force_reg (Pmode, off);
+ tp = copy_to_mode_reg (Pmode, tp);
+
+ tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD))));
+ set_mem_alias_set (tp, GOT_ALIAS_SET);
+
+ base = force_reg (Pmode, tp);
+
+ return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32)));
+#else
/* Fall back to global dynamic model if tool chain cannot support local
dynamic. */
if (TARGET_SUN_TLS && !TARGET_64BIT
@@ -12231,13 +12562,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
if (TARGET_64BIT)
{
rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx rdi = gen_rtx_REG (Pmode, DI_REG);
rtx_insn *insns;
start_sequence ();
emit_call_insn
- (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
- insns = get_insns ();
- end_sequence ();
+ (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
+ insns = end_sequence ();
if (GET_MODE (x) != Pmode)
x = gen_rtx_ZERO_EXTEND (Pmode, x);
@@ -12285,14 +12616,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
if (TARGET_64BIT)
{
rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx rdi = gen_rtx_REG (Pmode, DI_REG);
rtx_insn *insns;
rtx eqv;
start_sequence ();
emit_call_insn
- (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
- insns = get_insns ();
- end_sequence ();
+ (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
+ insns = end_sequence ();
/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
share the LD_BASE result with other LD model accesses. */
@@ -12405,6 +12736,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
}
return dest;
+#endif
}
/* Return true if the TLS address requires insn using integer registers.
@@ -12874,6 +13206,9 @@ output_pic_addr_const (FILE *file, rtx x, int code)
case UNSPEC_INDNTPOFF:
fputs ("@indntpoff", file);
break;
+ case UNSPEC_SECREL32:
+ fputs ("@secrel32", file);
+ break;
#if TARGET_MACHO
case UNSPEC_MACHOPIC_OFFSET:
putc ('-', file);
@@ -12899,7 +13234,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
{
fputs (ASM_LONG, file);
output_addr_const (file, x);
+#if TARGET_WIN32_TLS
+ fputs ("@secrel32", file);
+#else
fputs ("@dtpoff", file);
+#endif
switch (size)
{
case 4:
@@ -13558,10 +13897,11 @@ print_reg (rtx x, int code, FILE *file)
H -- print a memory address offset by 8; used for sse high-parts
Y -- print condition for XOP pcom* instruction.
V -- print naked full integer register name without %.
+ v -- print segment override prefix
+ -- print a branch hint as 'cs' or 'ds' prefix
; -- print a semicolon (after prefixes due to bug in older gas).
~ -- print "i" if TARGET_AVX2, "f" otherwise.
- ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+ ^ -- print addr32 prefix if Pmode != word_mode
M -- print addr32 prefix for TARGET_X32 with VSIB address.
! -- print NOTRACK prefix for jxx/call/ret instructions if required.
N -- print maskz if it's constant 0 operand.
@@ -14063,6 +14403,28 @@ ix86_print_operand (FILE *file, rtx x, int code)
return;
+ case 'v':
+ if (MEM_P (x))
+ {
+ switch (MEM_ADDR_SPACE (x))
+ {
+ case ADDR_SPACE_GENERIC:
+ break;
+ case ADDR_SPACE_SEG_FS:
+ fputs ("fs ", file);
+ break;
+ case ADDR_SPACE_SEG_GS:
+ fputs ("gs ", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ output_operand_lossage ("operand is not a memory reference, "
+ "invalid operand code 'v'");
+ return;
+
case '*':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('*', file);
@@ -14137,7 +14499,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
return;
case '^':
- if (TARGET_64BIT && Pmode != word_mode)
+ if (Pmode != word_mode)
fputs ("addr32 ", file);
return;
@@ -14652,6 +15014,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
output_addr_const (file, op);
fputs ("@indntpoff", file);
break;
+ case UNSPEC_SECREL32:
+ output_addr_const (file, op);
+ fputs ("@secrel32", file);
+ break;
#if TARGET_MACHO
case UNSPEC_MACHOPIC_OFFSET:
output_addr_const (file, op);
@@ -17904,9 +18270,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
if (cum->decl && !TREE_PUBLIC (cum->decl))
return;
- const_tree ctx = get_ultimate_context (cum->decl);
- if (ctx != NULL_TREE
- && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
+ tree decl = cum->decl;
+ if (!decl)
+ /* If we don't know the target, look at the current TU. */
+ decl = current_function_decl;
+
+ const_tree ctx = get_ultimate_context (decl);
+ if (ctx == NULL_TREE
+ || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
return;
/* If the actual size of the type is zero, then there is no change
@@ -20043,14 +20414,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
{
bool si;
enum ix86_builtins code;
- const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
if (!TARGET_AVX512F)
return NULL_TREE;
- if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
- return NULL_TREE;
-
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
? !TARGET_USE_SCATTER_2PARTS
: (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
@@ -20793,7 +21160,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
return true;
/* x87 registers can't do subreg at all, as all values are reformatted
- to extended precision. */
+ to extended precision.
+
+ ??? middle-end queries mode changes for ALL_REGS and this makes
+ vec_series_lowpart_p to always return false. We probably should
+ restrict this to modes supported by i387 and check if it is enabled. */
if (MAYBE_FLOAT_CLASS_P (regclass))
return false;
@@ -21168,7 +21539,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
- any of 512-bit wide vector mode
- any scalar mode. */
if (TARGET_AVX512F
- && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
+ && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
|| VALID_AVX512F_SCALAR_MODE (mode)))
return true;
@@ -21339,19 +21710,20 @@ ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
return mode1 == SFmode;
/* If MODE2 is only appropriate for an SSE register, then tie with
- any other mode acceptable to SSE registers. */
- if (GET_MODE_SIZE (mode2) == 64
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return (GET_MODE_SIZE (mode1) == 64
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
- if (GET_MODE_SIZE (mode2) == 32
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return (GET_MODE_SIZE (mode1) == 32
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
- if (GET_MODE_SIZE (mode2) == 16
+ any vector modes or scalar floating point modes acceptable to SSE
+ registers, excluding scalar integer modes with SUBREG:
+ (subreg:QI (reg:TI 99) 0))
+ (subreg:HI (reg:TI 99) 0))
+ (subreg:SI (reg:TI 99) 0))
+ (subreg:DI (reg:TI 99) 0))
+ to avoid unnecessary move from SSE register to integer register.
+ */
+ if (GET_MODE_SIZE (mode2) >= 16
+ && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
+ || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
+ && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return (GET_MODE_SIZE (mode1) == 16
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+ return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
/* If MODE2 is appropriate for an MMX register, then tie
with any other mode acceptable to MMX registers. */
@@ -21409,7 +21781,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
case MODE_VECTOR_INT:
case MODE_VECTOR_FLOAT:
- if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
|| (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
@@ -21470,7 +21842,7 @@ ix86_widen_mult_cost (const struct processor_costs *cost,
/* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
require extra 4 mul, 4 add, 4 cmp and 2 shift. */
if (!TARGET_SSE4_1 && !uns_p)
- extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
+ extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
+ cost->sse_op * 2;
/* Fallthru. */
case V4DImode:
@@ -21520,11 +21892,11 @@ ix86_multiplication_cost (const struct processor_costs *cost,
else if (TARGET_AVX2)
nops += 2;
else if (TARGET_XOP)
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
else
{
nops += 1;
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
}
goto do_qimode;
@@ -21543,13 +21915,13 @@ ix86_multiplication_cost (const struct processor_costs *cost,
{
nmults += 1;
nops += 2;
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
}
else
{
nmults += 1;
nops += 4;
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
}
goto do_qimode;
@@ -21562,14 +21934,16 @@ ix86_multiplication_cost (const struct processor_costs *cost,
{
nmults += 1;
nops += 4;
- extra += cost->sse_load[3] * 2;
+ /* 2 loads, so no division by 2. */
+ extra += COSTS_N_INSNS (cost->sse_load[3]);
}
goto do_qimode;
case V64QImode:
nmults = 2;
nops = 9;
- extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
+ /* 2 loads of each size, so no division by 2. */
+ extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
do_qimode:
return ix86_vec_cost (mode, cost->mulss * nmults
@@ -21662,7 +22036,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
/* Use vpbroadcast. */
extra = cost->sse_op;
else
- extra = cost->sse_load[2];
+ extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
if (constant_op1)
{
@@ -21693,7 +22067,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
shift with one insn set the cost to prefer paddb. */
if (constant_op1)
{
- extra = cost->sse_load[2];
+ extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
return ix86_vec_cost (mode, cost->sse_op) + extra;
}
else
@@ -21708,7 +22082,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
/* Use vpbroadcast. */
extra = cost->sse_op;
else
- extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+ extra = COSTS_N_INSNS (mode == V16QImode
+ ? cost->sse_load[2]
+ : cost->sse_load[3]) / 2;
if (constant_op1)
{
@@ -21816,6 +22192,34 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
return insn_cost + pattern_cost (PATTERN (insn), speed);
}
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+ if (size < 128)
+ return cost->cvtss2sd;
+ else if (size < 256)
+ {
+ if (TARGET_SSE_SPLIT_REGS)
+ return cost->cvtss2sd * size / 64;
+ return cost->cvtss2sd;
+ }
+ if (size < 512)
+ return cost->vcvtps2pd256;
+ else
+ return cost->vcvtps2pd512;
+}
+
+/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
+
+static bool
+unspec_pcmp_p (rtx x)
+{
+ return GET_CODE (x) == UNSPEC
+ && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
+}
+
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
@@ -21833,9 +22237,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
/* Handling different vternlog variants. */
if ((GET_MODE_SIZE (mode) == 64
- ? (TARGET_AVX512F && TARGET_EVEX512)
+ ? TARGET_AVX512F
: (TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
&& GET_MODE_SIZE (mode) >= 16
&& outer_code_i == SET
&& ternlog_operand (x, mode))
@@ -22184,8 +22588,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
{
/* (ior (not ...) ...) can be a single insn in AVX512. */
if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22276,8 +22679,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
/* (and (not ...) (not ...)) can be a single insn in AVX512. */
if (GET_CODE (right) == NOT && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22347,8 +22749,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
{
/* (not (xor ...)) can be a single insn in AVX512. */
if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22479,17 +22880,39 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
return false;
case FLOAT_EXTEND:
+ /* x87 represents all values extended to 80bit. */
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = 0;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case FLOAT_TRUNCATE:
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = cost->fadd;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
+ return false;
+ case FLOAT:
+ case UNSIGNED_FLOAT:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtpi2ps);
+ else
+ *total = cost->cvtsi2ss;
+ return false;
+
+ case FIX:
+ case UNSIGNED_FIX:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtps2pi);
+ else
+ *total = cost->cvtss2si;
return false;
case ABS:
@@ -22550,13 +22973,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
return false;
- case VEC_SELECT:
case VEC_CONCAT:
/* ??? Assume all of these vector manipulation patterns are
recognizable. In which case they all pretty much have the
- same cost. */
+ same cost.
+ ??? We should still recruse when computing cost. */
*total = cost->sse_op;
return true;
+
+ case VEC_SELECT:
+ /* Special case extracting lower part from the vector.
+ This by itself needs to code and most of SSE/AVX instructions have
+ packed and single forms where the single form may be represented
+ by such VEC_SELECT.
+
+ Use cost 1 (despite the fact that functionally equivalent SUBREG has
+ cost 0). Making VEC_SELECT completely free, for example instructs CSE
+ to forward propagate VEC_SELECT into
+
+ (set (reg eax) (reg src))
+
+ which then prevents fwprop and combining. See i.e.
+ gcc.target/i386/pr91103-1.c.
+
+ ??? rtvec_series_p test should be, for valid patterns, equivalent to
+ vec_series_lowpart_p but is not, since the latter calls
+ can_cange_mode_class on ALL_REGS and this return false since x87 does
+ not support subregs at all. */
+ if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
+ *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
+ outer_code, opno, speed) + 1;
+ else
+ /* ??? We should still recruse when computing cost. */
+ *total = cost->sse_op;
+ return true;
+
case VEC_DUPLICATE:
*total = rtx_cost (XEXP (x, 0),
GET_MODE (XEXP (x, 0)),
@@ -22569,13 +23020,87 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
case VEC_MERGE:
mask = XEXP (x, 2);
+ /* Scalar versions of SSE instructions may be represented as:
+
+ (vec_merge (vec_duplicate (operation ....))
+ (register or memory)
+ (const_int 1))
+
+ In this case vec_merge and vec_duplicate is for free.
+ Just recurse into operation and second operand. */
+ if (mask == const1_rtx
+ && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+ {
+ *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+ outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+ return true;
+ }
/* This is masked instruction, assume the same cost,
as nonmasked variant. */
- if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
- *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
+ else if (TARGET_AVX512F
+ && (register_operand (mask, GET_MODE (mask))
+ /* Redunduant clean up of high bits for kmask with VL=2/4
+ .i.e (vec_merge op0, op1, (and op3 15)). */
+ || (GET_CODE (mask) == AND
+ && register_operand (XEXP (mask, 0), GET_MODE (mask))
+ && CONST_INT_P (XEXP (mask, 1))
+ && ((INTVAL (XEXP (mask, 1)) == 3
+ && GET_MODE_NUNITS (mode) == 2)
+ || (INTVAL (XEXP (mask, 1)) == 15
+ && GET_MODE_NUNITS (mode) == 4)))))
+ {
+ *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+ return true;
+ }
+ /* Combination of the two above:
+
+ (vec_merge (vec_merge (vec_duplicate (operation ...))
+ (register or memory)
+ (reg:QI mask))
+ (register or memory)
+ (const_int 1))
+
+ i.e. avx512fp16_vcvtss2sh_mask. */
+ else if (TARGET_AVX512F
+ && mask == const1_rtx
+ && GET_CODE (XEXP (x, 0)) == VEC_MERGE
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
+ && register_operand (XEXP (XEXP (x, 0), 2),
+ GET_MODE (XEXP (XEXP (x, 0), 2))))
+ {
+ *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (XEXP (x, 0), 1),
+ mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+ return true;
+ }
+ /* vcmp. */
+ else if (unspec_pcmp_p (mask)
+ || (GET_CODE (mask) == NOT
+ && unspec_pcmp_p (XEXP (mask, 0))))
+ {
+ rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
+ rtx unsop0 = XVECEXP (uns, 0, 0);
+ /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
+ cost the same as register.
+ This is used by avx_cmp<mode>3_ltint_not. */
+ if (GET_CODE (unsop0) == SUBREG)
+ unsop0 = XEXP (unsop0, 0);
+ if (GET_CODE (unsop0) == NOT)
+ unsop0 = XEXP (unsop0, 0);
+ *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
+ + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
+ + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
+ + cost->sse_op;
+ return true;
+ }
else
*total = cost->sse_op;
- return true;
+ return false;
case MEM:
/* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
@@ -22592,7 +23117,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
/* An insn that accesses memory is slightly more expensive
- than one that does not. */
+ than one that does not. */
if (speed)
{
*total += 1;
@@ -22833,7 +23358,9 @@ x86_this_parameter (tree function)
{
const int *parm_regs;
- if (ix86_function_type_abi (type) == MS_ABI)
+ if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
+ parm_regs = x86_64_preserve_none_int_parameter_registers;
+ else if (ix86_function_type_abi (type) == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
@@ -23159,13 +23686,21 @@ x86_field_alignment (tree type, int computed)
/* Print call to TARGET to FILE. */
static void
-x86_print_call_or_nop (FILE *file, const char *target)
+x86_print_call_or_nop (FILE *file, const char *target,
+ const char *label)
{
if (flag_nop_mcount || !strcmp (target, "nop"))
/* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
- fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+ fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
+ label);
+ else if (!TARGET_PECOFF && flag_pic)
+ {
+ gcc_assert (flag_plt);
+
+ fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
+ }
else
- fprintf (file, "1:\tcall\t%s\n", target);
+ fprintf (file, "%s\tcall\t%s\n", label, target);
}
static bool
@@ -23250,6 +23785,13 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
const char *mcount_name = MCOUNT_NAME;
+ bool fentry_section_p
+ = (flag_record_mcount
+ || lookup_attribute ("fentry_section",
+ DECL_ATTRIBUTES (current_function_decl)));
+
+ const char *label = fentry_section_p ? "1:" : "";
+
if (current_fentry_name (&mcount_name))
;
else if (fentry_name)
@@ -23285,11 +23827,12 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
reg = legacy_reg;
}
if (ASSEMBLER_DIALECT == ASM_INTEL)
- fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
- "\tcall\t%s\n", reg, mcount_name, reg);
+ fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
+ "\tcall\t%s\n", label, reg, mcount_name,
+ reg);
else
- fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
- mcount_name, reg, reg);
+ fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
+ label, mcount_name, reg, reg);
break;
case CM_LARGE_PIC:
#ifdef NO_PROFILE_COUNTERS
@@ -23327,24 +23870,24 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
break;
case CM_SMALL_PIC:
case CM_MEDIUM_PIC:
- if (!ix86_direct_extern_access)
+ if (!flag_plt)
{
if (ASSEMBLER_DIALECT == ASM_INTEL)
- fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
- mcount_name);
+ fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
+ label, mcount_name);
else
- fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
- mcount_name);
+ fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
+ label, mcount_name);
break;
}
/* fall through */
default:
- x86_print_call_or_nop (file, mcount_name);
+ x86_print_call_or_nop (file, mcount_name, label);
break;
}
}
else
- x86_print_call_or_nop (file, mcount_name);
+ x86_print_call_or_nop (file, mcount_name, label);
}
else if (flag_pic)
{
@@ -23358,10 +23901,14 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
"\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
+ if (flag_plt)
+ x86_print_call_or_nop (file, mcount_name, label);
+ else if (ASSEMBLER_DIALECT == ASM_INTEL)
+ fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
+ label, mcount_name);
else
- fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
+ fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
+ label, mcount_name);
}
else
{
@@ -23374,12 +23921,10 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- x86_print_call_or_nop (file, mcount_name);
+ x86_print_call_or_nop (file, mcount_name, label);
}
- if (flag_record_mcount
- || lookup_attribute ("fentry_section",
- DECL_ATTRIBUTES (current_function_decl)))
+ if (fentry_section_p)
{
const char *sname = "__mcount_loc";
@@ -24138,7 +24683,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
return true;
- if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
&& VALID_MMX_REG_MODE (mode))
@@ -24386,8 +24931,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
}
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
if (saw_asm_flag)
return seq;
@@ -24675,7 +25219,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
switch (type_of_cost)
{
case scalar_stmt:
- return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+ return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
case scalar_load:
/* load/store costs are relative to register move which is 2. Recompute
@@ -24746,7 +25290,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
+ return ix86_vec_cost (mode, ix86_cost->sse_op);
+
case vec_promote_demote:
+ if (fp)
+ return vec_fp_conversion_cost (ix86_tune_cost, mode);
return ix86_vec_cost (mode, ix86_cost->sse_op);
case vec_construct:
@@ -24759,12 +25307,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
/* One vinserti128 for combining two SSE vectors for AVX256. */
else if (GET_MODE_BITSIZE (mode) == 256)
return ((n - 2) * ix86_cost->sse_op
- + ix86_vec_cost (mode, ix86_cost->addss));
+ + ix86_vec_cost (mode, ix86_cost->sse_op));
/* One vinserti64x4 and two vinserti128 for combining SSE
and AVX256 vectors to AVX512. */
else if (GET_MODE_BITSIZE (mode) == 512)
- return ((n - 4) * ix86_cost->sse_op
- + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+ {
+ machine_mode half_mode
+ = mode_for_vector (GET_MODE_INNER (mode),
+ GET_MODE_NUNITS (mode) / 2).require ();
+ return ((n - 4) * ix86_cost->sse_op
+ + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
+ + ix86_vec_cost (mode, ix86_cost->sse_op));
+ }
gcc_unreachable ();
}
@@ -24932,7 +25486,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
switch (mode)
{
case E_QImode:
- if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
return V64QImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V32QImode;
@@ -24940,7 +25494,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V16QImode;
case E_HImode:
- if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
return V32HImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V16HImode;
@@ -24948,7 +25502,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V8HImode;
case E_SImode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SImode;
@@ -24956,7 +25510,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V4SImode;
case E_DImode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V8DImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DImode;
@@ -24970,16 +25524,15 @@ ix86_preferred_simd_mode (scalar_mode mode)
{
if (TARGET_PREFER_AVX128)
return V8HFmode;
- else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
+ else if (TARGET_PREFER_AVX256)
return V16HFmode;
}
- if (TARGET_EVEX512)
- return V32HFmode;
+ return V32HFmode;
}
return word_mode;
case E_BFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V32BFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V16BFmode;
@@ -24987,7 +25540,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V8BFmode;
case E_SFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode;
@@ -24995,7 +25548,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V4SFmode;
case E_DFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V8DFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode;
@@ -25015,13 +25568,13 @@ ix86_preferred_simd_mode (scalar_mode mode)
static unsigned int
ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
{
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
{
modes->safe_push (V64QImode);
modes->safe_push (V32QImode);
modes->safe_push (V16QImode);
}
- else if (TARGET_AVX512F && TARGET_EVEX512 && all)
+ else if (TARGET_AVX512F && all)
{
modes->safe_push (V32QImode);
modes->safe_push (V16QImode);
@@ -25059,7 +25612,7 @@ ix86_get_mask_mode (machine_mode data_mode)
unsigned elem_size = vector_size / nunits;
/* Scalar mask case. */
- if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+ if ((TARGET_AVX512F && vector_size == 64)
|| (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
/* AVX512FP16 only supports vector comparison
to kmask for _Float16. */
@@ -25267,7 +25820,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else if (X87_FLOAT_MODE_P (mode))
stmt_cost = ix86_cost->fadd;
else
- stmt_cost = ix86_cost->add;
+ stmt_cost = ix86_cost->add;
}
else
stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
@@ -25322,7 +25875,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(subcode == RSHIFT_EXPR
&& !TYPE_UNSIGNED (TREE_TYPE (op1)))
? ASHIFTRT : LSHIFTRT, mode,
- TREE_CODE (op2) == INTEGER_CST,
+ TREE_CODE (op2) == INTEGER_CST,
cst_and_fits_in_hwi (op2)
? int_cst_value (op2) : -1,
false, false, NULL, NULL);
@@ -25331,27 +25884,174 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case NOP_EXPR:
/* Only sign-conversions are free. */
if (tree_nop_conversion_p
- (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
+ else if (fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ break;
+
+ case FLOAT_EXPR:
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtsi2ss;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ stmt_cost = ix86_cost->fadd;
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+ break;
+
+ case FIX_TRUNC_EXPR:
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtss2si;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ stmt_cost = ix86_cost->fadd;
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+ break;
+
+ case COND_EXPR:
+ {
+ /* SSE2 conditinal move sequence is:
+ pcmpgtd %xmm5, %xmm0 (accounted separately)
+ pand %xmm0, %xmm2
+ pandn %xmm1, %xmm0
+ por %xmm2, %xmm0
+ while SSE4 uses cmp + blend
+ and AVX512 masked moves.
+
+ The condition is accounted separately since we usually have
+ p = a < b
+ c = p ? x : y
+ and we will account first statement as setcc. Exception is when
+ p is loaded from memory as bool and then we will not acocunt
+ the compare, but there is no way to check for this. */
+
+ int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+ /* If one of parameters is 0 or -1 the sequence will be simplified:
+ (if_true & mask) | (if_false & ~mask) -> if_true & mask */
+ if (ninsns > 1
+ && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs3 (stmt_info->stmt))))
+ ninsns = 1;
+
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ninsns * ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
+ else
+ /* compare (accounted separately) + cmov. */
+ stmt_cost = ix86_cost->add;
+ }
break;
- case BIT_IOR_EXPR:
- case ABS_EXPR:
- case ABSU_EXPR:
case MIN_EXPR:
case MAX_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode)
+ && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else
+ /* minss */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vpmin was introduced in SSE3.
+ SSE2 needs pcmpgtd + pand + pandn + pxor.
+ If one of parameters is 0 or -1 the sequence is simplified
+ to pcmpgtd + pand. */
+ if (!TARGET_SSSE3)
+ {
+ if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt)))
+ stmt_cost *= 2;
+ else
+ stmt_cost *= 4;
+ }
+ }
+ else
+ /* cmp + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case ABS_EXPR:
+ case ABSU_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode)
+ && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* fabs. */
+ stmt_cost = ix86_cost->fabs;
+ else
+ /* andss of sign bit. */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vabs was introduced in SSE3.
+ SSE3 uses psrat + pxor + psub. */
+ if (!TARGET_SSSE3)
+ stmt_cost *= 3;
+ }
+ else
+ /* neg + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
case BIT_NOT_EXPR:
- if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
- stmt_cost = ix86_cost->sse_op;
- else if (VECTOR_MODE_P (mode))
+ gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
+ && !X87_FLOAT_MODE_P (mode));
+ if (VECTOR_MODE_P (mode))
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
else
stmt_cost = ix86_cost->add;
break;
+
default:
+ if (truth_value_p (subcode))
+ {
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* CMPccS? insructions are cheap, so use sse_op. While they
+ produce a mask which may need to be turned to 0/1 by and,
+ expect that this will be optimized away in a common case. */
+ stmt_cost = ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* fcmp + setcc. */
+ stmt_cost = ix86_cost->fadd + ix86_cost->add;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ else
+ /* setcc. */
+ stmt_cost = ix86_cost->add;
+ break;
+ }
break;
}
}
@@ -25375,6 +26075,37 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
+ if (kind == vec_promote_demote)
+ {
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+ bool inner_fp = FLOAT_TYPE_P
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+ if (fp && inner_fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ else if (fp && !inner_fp)
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+ else if (!fp && inner_fp)
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+ greater than inner size we will end up doing two conversions and
+ packing them. We always pack pairs; if the size difference is greater
+ it is split into multiple demote operations. */
+ if (inner_size > outer_size)
+ stmt_cost = stmt_cost * 2
+ + ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads
(AGU and load ports). Try to account for this by scaling the
@@ -25445,7 +26176,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else
{
m_num_gpr_needed[where]++;
- stmt_cost += ix86_cost->sse_to_integer;
+
+ int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+
+ /* For integer construction, the number of actual GPR -> XMM
+ moves will be somewhere between 0 and n.
+ We do not have very good idea about actual number, since
+ the source may be a constant, memory or a chain of
+ instructions that will be later converted by
+ scalar-to-vector pass. */
+ if (kind == vec_construct
+ && GET_MODE_BITSIZE (mode) == 256)
+ cost *= 2;
+ else if (kind == vec_construct
+ && GET_MODE_BITSIZE (mode) == 512)
+ cost *= 3;
+ stmt_cost += cost;
}
}
}
@@ -25537,14 +26283,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
/* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
if (loop_vinfo
+ && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
&& ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
- {
- if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64)
- m_suggested_epilogue_mode = V32QImode;
- else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32)
- m_suggested_epilogue_mode = V16QImode;
- }
+ m_suggested_epilogue_mode = V16QImode;
/* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
enable a 64bit SSE epilogue. */
if (loop_vinfo
@@ -25672,7 +26414,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
- if (TARGET_AVX512F && TARGET_EVEX512)
+ if (TARGET_AVX512F)
clonei->vecsize_mangle = 'e';
else if (TARGET_AVX2)
clonei->vecsize_mangle = 'd';
@@ -25764,17 +26506,17 @@ ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
return -1;
if (!TARGET_AVX)
return 0;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
+ return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
case 'c':
if (!TARGET_AVX)
return -1;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
+ return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
case 'd':
if (!TARGET_AVX2)
return -1;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
+ return TARGET_AVX512F ? 1 : 0;
case 'e':
- if (!TARGET_AVX512F || !TARGET_EVEX512)
+ if (!TARGET_AVX512F)
return -1;
return 0;
default:
@@ -27446,6 +28188,195 @@ ix86_cannot_copy_insn_p (rtx_insn *insn)
#undef TARGET_DOCUMENTATION_NAME
#define TARGET_DOCUMENTATION_NAME "x86"
+/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
+sbitmap
+ix86_get_separate_components (void)
+{
+ HOST_WIDE_INT offset, to_allocate;
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+ struct machine_function *m = cfun->machine;
+
+ offset = m->frame.stack_pointer_offset;
+ to_allocate = offset - m->frame.sse_reg_save_offset;
+
+ /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
+ Experiments show that APX PPX can speed up the prologue. If the function
+ does not exit early during actual execution, then using APX PPX is faster.
+ If the function always exits early during actual execution, then shrink
+ wrap separate reduces the number of MOV (PUSH/POP) instructions actually
+ executed, thus speeding up execution.
+ foo:
+ movl $1, %eax
+ testq %rdi, %rdi
+ jne.L60
+ ret ---> early return.
+ .L60:
+ subq $88, %rsp ---> belong to prologue.
+ xorl %eax, %eax
+ movq %rbx, 40 (%rsp) ---> belong to prologue.
+ movq 8 (%rdi), %rbx
+ movq %rbp, 48 (%rsp) ---> belong to prologue.
+ movq %rdi, %rbp
+ testq %rbx, %rbx
+ jne.L61
+ movq 40 (%rsp), %rbx
+ movq 48 (%rsp), %rbp
+ addq $88, %rsp
+ ret
+ .L61:
+ movq %r12, 56 (%rsp) ---> belong to prologue.
+ movq %r13, 64 (%rsp) ---> belong to prologue.
+ movq %r14, 72 (%rsp) ---> belong to prologue.
+ ... ...
+
+ Disable shrink wrap separate when PPX is enabled. */
+ if ((TARGET_APX_PPX && !crtl->calls_eh_return)
+ || cfun->machine->func_type != TYPE_NORMAL
+ || TARGET_SEH
+ || crtl->stack_realign_needed
+ || m->call_ms2sysv)
+ return components;
+
+ /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
+ Disable shrink wrap separate when MOV is prohibited. */
+ if (save_regs_using_push_pop (to_allocate))
+ return components;
+
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ /* Skip registers with large offsets, where a pseudo may be needed. */
+ if (IN_RANGE (offset, -0x8000, 0x7fff))
+ bitmap_set_bit (components, regno);
+ offset += UNITS_PER_WORD;
+ }
+
+ /* Don't mess with the following registers. */
+ if (frame_pointer_needed)
+ bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+ if (crtl->drap_reg)
+ bitmap_clear_bit (components, REGNO (crtl->drap_reg));
+
+ if (pic_offset_table_rtx)
+ bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
+
+ return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
+sbitmap
+ix86_components_for_bb (basic_block bb)
+{
+ bitmap in = DF_LIVE_IN (bb);
+ bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+ bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+
+ function_abi_aggregator callee_abis;
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (CALL_P (insn))
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
+ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
+ /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!fixed_regs[regno]
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+ || bitmap_bit_p (in, regno)
+ || bitmap_bit_p (gen, regno)
+ || bitmap_bit_p (kill, regno)))
+ bitmap_set_bit (components, regno);
+
+ return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
+void
+ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+ /* Nothing to do for x86. */
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
+void
+ix86_emit_prologue_components (sbitmap components)
+{
+ HOST_WIDE_INT cfa_offset;
+ struct machine_function *m = cfun->machine;
+
+ cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+ - m->frame.stack_pointer_offset;
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ if (bitmap_bit_p (components, regno))
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+ cfa_offset -= UNITS_PER_WORD;
+ }
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
+void
+ix86_emit_epilogue_components (sbitmap components)
+{
+ HOST_WIDE_INT cfa_offset;
+ struct machine_function *m = cfun->machine;
+ cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+ - m->frame.stack_pointer_offset;
+
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ if (bitmap_bit_p (components, regno))
+ {
+ rtx reg = gen_rtx_REG (word_mode, regno);
+ rtx mem;
+ rtx_insn *insn;
+
+ mem = choose_baseaddr (cfa_offset, NULL);
+ mem = gen_frame_mem (word_mode, mem);
+ insn = emit_move_insn (reg, mem);
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+ }
+ cfa_offset -= UNITS_PER_WORD;
+ }
+}
+
+/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
+void
+ix86_set_handled_components (sbitmap components)
+{
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (bitmap_bit_p (components, regno))
+ {
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
+ cfun->machine->use_fast_prologue_epilogue = true;
+ cfun->machine->frame.save_regs_using_mov = true;
+ }
+}
+
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+ ix86_emit_prologue_components
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+ ix86_emit_epilogue_components
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243..3f7ad68 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -179,6 +179,7 @@ struct processor_costs {
const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
zmm_move;
const int sse_to_integer; /* cost of moving SSE register to integer. */
+ const int integer_to_sse; /* cost of moving integer register to SSE. */
const int gather_static, gather_per_elt; /* Cost of gather load is computed
as static + per_item * nelts. */
const int scatter_static, scatter_per_elt; /* Cost of gather store is
@@ -207,6 +208,16 @@ struct processor_costs {
const int divsd; /* cost of DIVSD instructions. */
const int sqrtss; /* cost of SQRTSS instructions. */
const int sqrtsd; /* cost of SQRTSD instructions. */
+ const int cvtss2sd; /* cost SSE FP conversions,
+ such as CVTSS2SD. */
+ const int vcvtps2pd256; /* cost 256bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in ymm. */
+ const int vcvtps2pd512; /* cost 512bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in zmm. */
+ const int cvtsi2ss; /* cost of CVTSI2SS instruction. */
+ const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */
+ const int cvtpi2ps; /* cost of CVTPI2PS instruction. */
+ const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
@@ -479,7 +490,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_SSE_MOVCC_USE_BLENDV \
ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
#define TARGET_ALIGN_TIGHT_LOOPS \
- ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+ ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+#define TARGET_SSE_REDUCTION_PREFER_PSHUF \
+ ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
/* Feature tests against the various architecture variations. */
@@ -525,6 +538,7 @@ extern unsigned char ix86_prefetch_sse;
#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS 0
+#define TARGET_WIN32_TLS 0
#ifndef TARGET_64BIT_DEFAULT
#define TARGET_64BIT_DEFAULT 0
@@ -804,7 +818,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
TARGET_ABSOLUTE_BIGGEST_ALIGNMENT. */
#define BIGGEST_ALIGNMENT \
- (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \
+ (TARGET_IAMCU ? 32 : (TARGET_AVX512F \
? 512 : (TARGET_AVX ? 256 : 128)))
/* Maximum stack alignment. */
@@ -1682,6 +1696,8 @@ typedef struct ix86_args {
int stdarg; /* Set to 1 if function is stdarg. */
enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise
MS_ABI for ms abi. */
+ bool preserve_none_abi; /* Set to true if the preserve_none ABI is
+ used. */
tree decl; /* Callee decl. */
} CUMULATIVE_ARGS;
@@ -1883,7 +1899,7 @@ typedef struct ix86_args {
MOVE_MAX_PIECES defaults to MOVE_MAX. */
#define MOVE_MAX \
- ((TARGET_AVX512F && TARGET_EVEX512\
+ ((TARGET_AVX512F \
&& (ix86_move_max == PVW_AVX512 \
|| ix86_store_max == PVW_AVX512)) \
? 64 \
@@ -1902,7 +1918,7 @@ typedef struct ix86_args {
store_by_pieces of 16/32/64 bytes. */
#define STORE_MAX_PIECES \
(TARGET_INTER_UNIT_MOVES_TO_VEC \
- ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \
+ ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \
? 64 \
: ((TARGET_AVX \
&& ix86_store_max >= PVW_AVX256) \
@@ -2255,6 +2271,13 @@ extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
} while (0)
#endif
+/* In Intel syntax, we have to quote user-defined labels that would
+ match (unprefixed) registers or operators. */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+ ix86_asm_output_labelref ((STREAM), user_label_prefix, (NAME))
+
/* Under some conditions we need jump tables in the text section,
because the assembler cannot handle label differences between
sections. */
@@ -2396,13 +2419,13 @@ constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES
| PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
| PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
- | PTA_CLWB | PTA_EVEX512;
+ | PTA_CLWB;
constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512
| PTA_AVX512VNNI;
constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16;
constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
| PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
- | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_EVEX512;
+ | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
| PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
| PTA_RDPID | PTA_AVX512VPOPCNTDQ;
@@ -2425,14 +2448,16 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
| PTA_SGX | PTA_PTWRITE;
constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
| PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+ | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
| PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
- | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+ | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+ | PTA_ENQCMD | PTA_UINTR;
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
- | PTA_PREFETCHI;
+ | PTA_PREFETCHI | PTA_AVX10_1;
constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
| PTA_AMX_COMPLEX;
constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST;
@@ -2444,16 +2469,11 @@ constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST
| PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR
| PTA_PREFETCHI;
constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI;
-constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA
- | PTA_GFNI | PTA_VAES | PTA_VPCLMULQDQ | PTA_RDPID | PTA_PCONFIG
- | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD
- | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK
- | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI
- | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256
- | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16
- | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4
- | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32
- | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D
+ | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
+ | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
+ | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
+ | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
@@ -2480,7 +2500,7 @@ constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
| PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL
| PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
- | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_EVEX512;
+ | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI
| PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI;
@@ -2782,11 +2802,13 @@ enum call_saved_registers_type
or "no_caller_saved_registers" attribute. */
TYPE_NO_CALLER_SAVED_REGISTERS,
/* The current function is a function specified with the
- "no_callee_saved_registers" attribute. */
+ "no_callee_saved_registers" attribute or a function specified with
+ the "noreturn" attribute when compiled with
+ "-mnoreturn-no-callee-saved-registers". */
TYPE_NO_CALLEE_SAVED_REGISTERS,
- /* The current function is a function specified with the "noreturn"
- attribute. */
- TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP,
+ /* The current function is a function specified with the
+ "preserve_none" attribute. */
+ TYPE_PRESERVE_NONE,
};
enum queued_insn_type
@@ -2805,6 +2827,10 @@ struct GTY(()) machine_function {
/* Cached initial frame layout for the current function. */
struct ix86_frame frame;
+ /* The components already handled by separate shrink-wrapping, which should
+ not be considered by the prologue and epilogue. */
+ bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
+
/* For -fsplit-stack support: A stack local which holds a pointer to
the stack arguments for a function with a variable number of
arguments. This is set at the start of the function and is used
@@ -2859,7 +2885,7 @@ struct GTY(()) machine_function {
ENUM_BITFIELD(indirect_branch) function_return_type : 3;
/* Call saved registers type. */
- ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 2;
+ ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 3;
/* If true, there is register available for argument passing. This
is used only in ix86_function_ok_for_sibcall by 32-bit to determine
@@ -2904,6 +2930,9 @@ struct GTY(()) machine_function {
/* True if inline asm with redzone clobber has been seen. */
BOOL_BITFIELD asm_redzone_clobber_seen : 1;
+ /* True if this is a recursive function. */
+ BOOL_BITFIELD recursive_function : 1;
+
/* The largest alignment, in bytes, of stack slot actually used. */
unsigned int max_used_stack_alignment;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d6b2f29..21b9f5c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -58,10 +58,11 @@
;; H -- print a memory address offset by 8; used for sse high-parts
;; K -- print HLE lock prefix
;; Y -- print condition for XOP pcom* instruction.
+;; v -- print segment override prefix
;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas).
;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+;; ^ -- print addr32 prefix if Pmode != word_mode
;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
(define_c_enum "unspec" [
@@ -79,6 +80,7 @@
UNSPEC_MACHOPIC_OFFSET
UNSPEC_PCREL
UNSPEC_SIZEOF
+ UNSPEC_SECREL32
;; Prologue support
UNSPEC_STACK_ALLOC
@@ -579,12 +581,11 @@
(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64,
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
- avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
- noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
- noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
- avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
- avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
- vaes_avx512vl,noapx_nf,avx10_2"
+ avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+ avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
+ avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
+ avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl,
+ avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2"
(const_string "base"))
;; The (bounding maximum) length of an instruction immediate.
@@ -954,12 +955,8 @@
(eq_attr "isa" "fma_or_avx512vl")
(symbol_ref "TARGET_FMA || TARGET_AVX512VL")
(eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
- (eq_attr "isa" "avx512f_512")
- (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
(eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
(eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
- (eq_attr "isa" "avx512bw_512")
- (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
(eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
(eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
@@ -1495,7 +1492,7 @@
[(reg:CC FLAGS_REG) (const_int 0)])
(label_ref (match_operand 3))
(pc)))]
- "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
+ "TARGET_AVX512F && !TARGET_PREFER_AVX256"
{
ix86_expand_branch (GET_CODE (operands[0]),
operands[1], operands[2], operands[3]);
@@ -1602,6 +1599,20 @@
[(set_attr "type" "icmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*cmp<mode>_plus_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m")
+ (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+{
+ operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode);
+ return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*cmpqi_ext<mode>_1"
[(set (reg FLAGS_REG)
(compare
@@ -2374,7 +2385,7 @@
(define_expand "movxi"
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"ix86_expand_vector_move (XImode, operands); DONE;")
(define_expand "movoi"
@@ -2427,22 +2438,32 @@
(set_attr "mode" "SI")
(set_attr "length_immediate" "0")])
-(define_insn "*mov<mode>_and"
+;; Generate shorter "and $0,mem" for -Oz. Split it to "mov $0,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_and"
[(set (match_operand:SWI248 0 "memory_operand" "=m")
(match_operand:SWI248 1 "const0_operand"))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"and{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+ [(set (match_dup 0) (match_dup 1))]
+ ""
[(set_attr "type" "alu1")
(set_attr "mode" "<MODE>")
(set_attr "length_immediate" "1")])
-(define_insn "*mov<mode>_or"
+;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_or"
[(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
(match_operand:SWI248 1 "constm1_operand"))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"or{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+ [(set (match_dup 0) (match_dup 1))]
+ ""
[(set_attr "type" "alu1")
(set_attr "mode" "<MODE>")
(set_attr "length_immediate" "1")])
@@ -2450,7 +2471,7 @@
(define_insn "*movxi_internal_avx512f"
[(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
(match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
{
@@ -2947,6 +2968,7 @@
(match_operand:SWI248 1 "const_int_operand"))]
"optimize_insn_for_size_p () && optimize_size > 1
&& operands[1] != const0_rtx
+ && operands[1] != constm1_rtx
&& IN_RANGE (INTVAL (operands[1]), -128, 127)
&& !ix86_red_zone_used
&& REGNO (operands[0]) != SP_REG"
@@ -4414,7 +4436,7 @@
(eq_attr "alternative" "11")
(const_string "DI")
(eq_attr "alternative" "5")
- (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
+ (cond [(and (match_test "TARGET_AVX512F")
(not (match_test "TARGET_PREFER_AVX256")))
(const_string "V16SF")
(match_test "TARGET_AVX")
@@ -5482,7 +5504,7 @@
(set_attr "memory" "none")
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "2")
- (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+ (symbol_ref "TARGET_AVX512F
&& !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
(const_string "*")))])
@@ -5704,7 +5726,7 @@
/* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
and it always round to even.
- flag_unsafte_math_optimization is needed for psrld.
+ flag_unsafe_math_optimization is needed for psrld.
If we don't expect qNaNs nor sNaNs and can assume rounding
to nearest, we can expand the conversion inline as
(fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */
@@ -8708,6 +8730,34 @@
(set (match_dup 1)
(minus:SWI (match_dup 1) (match_dup 0)))])])
+;; Under APX NDD, 'sub reg, mem, reg' is valid.
+;; New format for
+;; mov reg0, mem1
+;; sub reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sub mem2, reg0
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SWI 2 "memory_operand")
+ (match_dup 0)))
+ (set (match_dup 0)
+ (minus:SWI (match_dup 2) (match_dup 0)))])
+ (set (match_dup 2) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 1))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_dup 2) (match_dup 0)))
+ (set (match_dup 2)
+ (minus:SWI (match_dup 2) (match_dup 0)))])])
+
;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
;; subl $1, %eax; jnc .Lxx;
(define_peephole2
@@ -9155,6 +9205,118 @@
(match_dup 1))
(match_dup 0)))])])
+;; Under APX NDD, 'adc reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem1, reg0
+;; to
+;; mov reg0, mem2
+;; adc mem1, reg0
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_operand:SWI48 2 "memory_operand"))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))])
+ (set (match_dup 1) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 1)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))])])
+
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; adc mem2, reg0
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_operand:SWI48 2 "memory_operand"))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))])
+ (set (match_dup 2) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 1))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 2)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))])])
+
(define_peephole2
[(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
@@ -9635,6 +9797,52 @@
[(match_dup 3) (const_int 0)]))
(match_dup 0)))])])
+;; Under APX NDD, 'sbb reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; sbb reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sbb mem2, reg0
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand"))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (zero_extend:<DWI>
+ (match_dup 0)))))
+ (set (match_dup 0)
+ (minus:SWI48
+ (minus:SWI48
+ (match_dup 2)
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])
+ (set (match_dup 2) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 1))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_dup 2))
+ (plus:<DWI> (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 2)
+ (minus:SWI48 (minus:SWI48 (match_dup 2)
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])])
+
(define_peephole2
[(set (match_operand:SWI48 6 "general_reg_operand")
(match_operand:SWI48 7 "memory_operand"))
@@ -21315,11 +21523,12 @@
(set_attr "mode" "SI")])
; As bsr is undefined behavior on zero and for other input
-; values it is in range 0 to 63, we can optimize away sign-extends.
-(define_insn_and_split "*bsr_rex64_2"
+; values it is in range 0 to 63, we can optimize away sign-extends
+; or zero-extends.
+(define_insn_and_split "*bsr_rex64<u>_2"
[(set (match_operand:DI 0 "register_operand")
(xor:DI
- (sign_extend:DI
+ (any_extend:DI
(minus:SI
(const_int 63)
(subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21341,9 +21550,9 @@
operands[3] = lowpart_subreg (SImode, operands[2], DImode);
})
-(define_insn_and_split "*bsr_2"
+(define_insn_and_split "*bsr<u>_2"
[(set (match_operand:DI 0 "register_operand")
- (sign_extend:DI
+ (any_extend:DI
(xor:SI
(minus:SI
(const_int 31)
@@ -21420,7 +21629,7 @@
(minus:DI
(match_operand:DI 2 "const_int_operand")
(xor:DI
- (sign_extend:DI
+ (any_extend:DI
(minus:SI (const_int 63)
(subreg:SI
(clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21450,7 +21659,7 @@
[(set (match_operand:DI 0 "register_operand")
(minus:DI
(match_operand:DI 2 "const_int_operand")
- (sign_extend:DI
+ (any_extend:DI
(xor:SI
(minus:SI (const_int 31)
(clz:SI (match_operand:SI 1 "nonimmediate_operand")))
@@ -22992,7 +23201,8 @@
(match_operand 3)))
(unspec:P [(match_operand 1 "tls_symbolic_operand")
(reg:P SP_REG)]
- UNSPEC_TLS_GD)]
+ UNSPEC_TLS_GD)
+ (clobber (match_operand:P 4 "register_operand" "=D"))]
"TARGET_64BIT"
{
if (!TARGET_X32)
@@ -23009,7 +23219,7 @@
Use data16 prefix instead, which doesn't have this problem. */
fputs ("\tdata16", asm_out_file);
output_asm_insn
- ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+ ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands);
if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
fputs (ASM_SHORT "0x6666\n", asm_out_file);
else
@@ -23033,14 +23243,15 @@
(match_operand 4)))
(unspec:DI [(match_operand 1 "tls_symbolic_operand")
(reg:DI SP_REG)]
- UNSPEC_TLS_GD)]
+ UNSPEC_TLS_GD)
+ (clobber (match_operand:DI 5 "register_operand" "=D"))]
"TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
&& GET_CODE (operands[3]) == CONST
&& GET_CODE (XEXP (operands[3], 0)) == UNSPEC
&& XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
{
output_asm_insn
- ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+ ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands);
output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
return "call\t{*%%rax|rax}";
@@ -23056,7 +23267,8 @@
(const_int 0)))
(unspec:P [(match_operand 1 "tls_symbolic_operand")
(reg:P SP_REG)]
- UNSPEC_TLS_GD)])]
+ UNSPEC_TLS_GD)
+ (clobber (match_operand:P 3 "register_operand"))])]
"TARGET_64BIT"
"ix86_tls_descriptor_calls_expanded_in_cfun = true;")
@@ -23107,11 +23319,12 @@
(call:P
(mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
(match_operand 2)))
- (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
+ (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
+ (clobber (match_operand:P 3 "register_operand" "=D"))]
"TARGET_64BIT"
{
output_asm_insn
- ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+ ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands);
if (TARGET_SUN_TLS)
return "call\t%p1@plt";
if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
@@ -23127,14 +23340,15 @@
(mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
(match_operand:DI 2 "immediate_operand" "i")))
(match_operand 3)))
- (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
+ (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)
+ (clobber (match_operand:DI 4 "register_operand" "=D"))]
"TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
&& GET_CODE (operands[2]) == CONST
&& GET_CODE (XEXP (operands[2], 0)) == UNSPEC
&& XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
{
output_asm_insn
- ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+ ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands);
output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
return "call\t{*%%rax|rax}";
@@ -23148,7 +23362,8 @@
(call:P
(mem:QI (match_operand 1))
(const_int 0)))
- (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
+ (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
+ (clobber (match_operand:P 2 "register_operand"))])]
"TARGET_64BIT"
"ix86_tls_descriptor_calls_expanded_in_cfun = true;")
@@ -25587,10 +25802,6 @@
(clobber (reg:CC FLAGS_REG))])]
""
{
- /* Can't use this for non-default address spaces. */
- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
- FAIL;
-
int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
/* If .md ever supports :P for Pmode, these can be directly
@@ -25598,9 +25809,14 @@
operands[5] = plus_constant (Pmode, operands[0], piece_size);
operands[6] = plus_constant (Pmode, operands[2], piece_size);
- /* Can't use this if the user has appropriated esi or edi. */
+ /* Can't use this if the user has appropriated esi or edi,
+ * or if we have the destination in the non-default address space,
+ * since string insns cannot override the destination segment. */
if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
- && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+ && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
+ && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))
+ && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))
+ || Pmode == word_mode))
{
emit_insn (gen_strmov_singleop (operands[0], operands[1],
operands[2], operands[3],
@@ -25635,8 +25851,15 @@
(const_int 8)))]
"TARGET_64BIT
&& !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsq"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movsq";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "DI")])
@@ -25651,8 +25874,15 @@
(plus:P (match_dup 3)
(const_int 4)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movs{l|d}"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movs{l|d}";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "SI")])
@@ -25667,8 +25897,15 @@
(plus:P (match_dup 3)
(const_int 2)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsw"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movsw";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "HI")])
@@ -25683,8 +25920,15 @@
(plus:P (match_dup 3)
(const_int 1)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsb"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movsb";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set (attr "prefix_rex")
@@ -25723,8 +25967,15 @@
(use (match_dup 5))]
"TARGET_64BIT
&& !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movsq"
+ && ix86_check_movs (insn, 3)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1rep{%;} movsq";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
@@ -25743,8 +25994,15 @@
(mem:BLK (match_dup 4)))
(use (match_dup 5))]
"!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movs{l|d}"
+ && ix86_check_movs (insn, 3)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1rep{%;} movs{l|d}";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
@@ -25761,8 +26019,15 @@
(mem:BLK (match_dup 4)))
(use (match_dup 5))]
"!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movsb"
+ && ix86_check_movs (insn, 3)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1rep{%;} movsb";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
@@ -25844,7 +26109,8 @@
(unspec [(const_int 0)] UNSPEC_STOS)]
"TARGET_64BIT
&& !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosq"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25858,7 +26124,8 @@
(const_int 4)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stos{l|d}"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25872,7 +26139,8 @@
(const_int 2)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosw"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25886,7 +26154,8 @@
(const_int 1)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosb"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25922,7 +26191,8 @@
(use (match_dup 4))]
"TARGET_64BIT
&& !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stosq"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
@@ -25940,7 +26210,8 @@
(use (match_operand:SI 2 "register_operand" "a"))
(use (match_dup 4))]
"!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stos{l|d}"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
@@ -25957,7 +26228,8 @@
(use (match_operand:QI 2 "register_operand" "a"))
(use (match_dup 4))]
"!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stosb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
@@ -26224,8 +26496,8 @@
(define_expand "mov<mode>cc"
[(set (match_operand:SWIM 0 "register_operand")
(if_then_else:SWIM (match_operand 1 "comparison_operator")
- (match_operand:SWIM 2 "<general_operand>")
- (match_operand:SWIM 3 "<general_operand>")))]
+ (match_operand:SWIM 2 "general_operand")
+ (match_operand:SWIM 3 "general_operand")))]
""
"if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
@@ -26592,8 +26864,8 @@
[(set (match_operand:X87MODEF 0 "register_operand")
(if_then_else:X87MODEF
(match_operand 1 "comparison_operator")
- (match_operand:X87MODEF 2 "register_operand")
- (match_operand:X87MODEF 3 "register_operand")))]
+ (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand")
+ (match_operand:X87MODEF 3 "nonimm_or_0_operand")))]
"(TARGET_80387 && TARGET_CMOVE)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
@@ -27183,7 +27455,7 @@
(cond [(and (eq_attr "alternative" "0")
(not (match_test "TARGET_OPT_AGU")))
(const_string "alu")
- (match_operand:<MODE> 2 "const0_operand")
+ (match_operand 2 "const0_operand")
(const_string "imov")
]
(const_string "lea")))
@@ -27197,6 +27469,46 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (plus:P (match_operand:P 1 "register_operand" "r")
+ (match_operand:P 2 "<nonmemory_operand>" "l<i>")))
+ (clobber (mem:BLK (scratch)))]
+ ""
+{
+ if (get_attr_type (insn) == TYPE_IMOV)
+ return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
+ else
+ {
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(match_operand 2 "const0_operand")
+ (const_string "imov")
+ ]
+ (const_string "lea")))
+ (set (attr "length_immediate")
+ (cond [(eq_attr "type" "imov")
+ (const_string "0")
+ ]
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:P 0 "register_operand")
+ (plus:P (match_dup 0)
+ (match_operand:P 1 "<nonmemory_operand>")))
+ (clobber (mem:BLK (scratch)))])]
+ "peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel
+ [(set (match_dup 0)
+ (plus:P (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])])
+
(define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
[(set (match_operand:P 0 "register_operand" "=r")
(minus:P (match_operand:P 1 "register_operand" "0")
@@ -28144,6 +28456,41 @@
const0_rtx);
})
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+ [(parallel [(set (reg FLAGS_REG)
+ (compare (match_operator:SWI 2 "plusminuslogic_operator"
+ [(match_operand:SWI 0 "memory_operand")
+ (match_operand:SWI 1 "<nonmemory_operand>")])
+ (const_int 0)))
+ (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+ (set (match_dup 0) (match_dup 3))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (2, operands[3])
+ && !reg_overlap_mentioned_p (operands[3], operands[0])
+ && ix86_match_ccmode (peep2_next_insn (0),
+ (GET_CODE (operands[2]) == PLUS
+ || GET_CODE (operands[2]) == MINUS)
+ ? CCGOCmode : CCNOmode)"
+ [(parallel [(set (match_dup 4) (match_dup 6))
+ (set (match_dup 0) (match_dup 5))])]
+{
+ operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+ operands[5]
+ = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ copy_rtx (operands[0]), operands[1]);
+ operands[6]
+ = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+ const0_rtx);
+})
+
;; Likewise for instances where we have a lea pattern.
(define_peephole2
[(set (match_operand:SWI 0 "register_operand")
@@ -28237,6 +28584,54 @@
const0_rtx);
})
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+ [(set (match_operand:SWI 0 "register_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_operand:SWI 4 "register_operand")
+ (xor:SWI (match_operand:SWI 3 "register_operand")
+ (match_operand:SWI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_dup 4))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SWI 5 "register_operand")
+ (match_operand:SWI 6 "<nonmemory_operand>")))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && REGNO (operands[3]) == REGNO (operands[0])
+ && (rtx_equal_p (operands[0], operands[5])
+ ? rtx_equal_p (operands[2], operands[6])
+ : rtx_equal_p (operands[2], operands[5])
+ && rtx_equal_p (operands[0], operands[6]))
+ && peep2_reg_dead_p (3, operands[4])
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && (<MODE>mode != QImode
+ || immediate_operand (operands[2], QImode)
+ || any_QIreg_operand (operands[2], QImode))"
+ [(parallel [(set (match_dup 7) (match_dup 9))
+ (set (match_dup 1) (match_dup 8))])]
+{
+ operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+ operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+ operands[2]);
+ operands[9]
+ = gen_rtx_COMPARE (GET_MODE (operands[7]),
+ copy_rtx (operands[8]),
+ const0_rtx);
+})
+
(define_peephole2
[(set (match_operand:SWI12 0 "register_operand")
(match_operand:SWI12 1 "memory_operand"))
@@ -28480,6 +28875,58 @@
const0_rtx);
})
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+ [(set (match_operand:SWI12 0 "register_operand")
+ (match_operand:SWI12 1 "memory_operand"))
+ (parallel [(set (match_operand:SI 4 "register_operand")
+ (xor:SI (match_operand:SI 3 "register_operand")
+ (match_operand:SI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SWI12 6 "register_operand")
+ (match_operand:SWI12 7 "<nonmemory_operand>")))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && REGNO (operands[3]) == REGNO (operands[0])
+ && REGNO (operands[5]) == REGNO (operands[4])
+ && (rtx_equal_p (operands[0], operands[6])
+ ? (REG_P (operands[2])
+ ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+ : rtx_equal_p (operands[2], operands[7]))
+ : (rtx_equal_p (operands[0], operands[7])
+ && REG_P (operands[2])
+ && REGNO (operands[2]) == REGNO (operands[6])))
+ && peep2_reg_dead_p (3, operands[5])
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && (<MODE>mode != QImode
+ || immediate_operand (operands[2], SImode)
+ || any_QIreg_operand (operands[2], SImode))"
+ [(parallel [(set (match_dup 8) (match_dup 10))
+ (set (match_dup 1) (match_dup 9))])]
+{
+ operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+ operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+ gen_lowpart (<MODE>mode, operands[2]));
+ operands[10]
+ = gen_rtx_COMPARE (GET_MODE (operands[8]),
+ copy_rtx (operands[9]),
+ const0_rtx);
+})
+
;; Attempt to optimize away memory stores of values the memory already
;; has. See PR79593.
(define_peephole2
@@ -29082,6 +29529,23 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_expand "crc_rev<SWI124:mode>si4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "register_operand")
+ (match_operand:SWI124 2 "nonimmediate_operand")
+ (match_operand:SI 3)]
+ "TARGET_CRC32"
+{
+ /* crc32 uses iSCSI polynomial */
+ if (INTVAL (operands[3]) == 0x1EDC6F41)
+ emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2]));
+ else
+ expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+ operands[3], <SWI124:MODE>mode,
+ generate_reflecting_code_standard);
+ DONE;
+})
+
(define_insn "rdpmc"
[(set (match_operand:DI 0 "register_operand" "=A")
(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 27d34bd..c93c0b1 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -36,13 +36,6 @@ HOST_WIDE_INT ix86_isa_flags_explicit
Variable
HOST_WIDE_INT ix86_isa_flags2_explicit
-; Indicate if AVX512 and AVX10.1 are explicitly set no.
-Variable
-int ix86_no_avx512_explicit = 0
-
-Variable
-int ix86_no_avx10_1_explicit = 0
-
; Additional target flags
Variable
int ix86_target_flags
@@ -103,14 +96,6 @@ HOST_WIDE_INT x_ix86_isa_flags2_explicit
TargetSave
HOST_WIDE_INT x_ix86_isa_flags_explicit
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx512_explicit
-
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx10_1_explicit
-
;; whether -mtune was not specified
TargetSave
unsigned char tune_defaulted
@@ -721,13 +706,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
msse4
-Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
-mno-sse4
-Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save
-Do not support SSE4.1 and SSE4.2 built-in functions and code generation.
-
msse5
Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
;; Deprecated
@@ -1355,38 +1336,24 @@ mapx-inline-asm-use-gpr32
Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
Enable GPR32 in inline asm when APX_F enabled.
-mevex512
-Target Mask(ISA2_EVEX512) Var(ix86_isa_flags2) Save Warn(%<-mevex512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support 512 bit vector built-in functions and code generation.
-
musermsr
Target Mask(ISA2_USER_MSR) Var(ix86_isa_flags2) Save
Support USER_MSR built-in functions and code generation.
-mavx10.1-256
-Target Mask(ISA2_AVX10_1_256) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-256 built-in functions and code generation.
-
mavx10.1
-Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
-
-mavx10.1-512
-Target Alias(mavx10.1)
+Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
+and AVX10.1 built-in functions and code generation.
mavx10.2
Target Mask(ISA2_AVX10_2) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-AVX10.1-512 and AVX10.2 built-in functions and code generation.
+AVX10.1 and AVX10.2 built-in functions and code generation.
mamx-avx512
Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512,
-AVX10.2 and AMX-AVX512 built-in functions and code generation.
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+AVX10.1, AVX10.2 and AMX-AVX512 built-in functions and code generation.
mamx-tf32
Target Mask(ISA2_AMX_TF32) Var(ix86_isa_flags2) Save
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 0d5a5a1..cce524c 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -590,21 +590,12 @@ UrlSuffix(gcc/x86-Options.html#index-mapxf)
mapx-inline-asm-use-gpr32
UrlSuffix(gcc/x86-Options.html#index-mapx-inline-asm-use-gpr32)
-mevex512
-UrlSuffix(gcc/x86-Options.html#index-mevex512)
-
musermsr
UrlSuffix(gcc/x86-Options.html#index-musermsr)
-mavx10.1-256
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256)
-
mavx10.1
UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1)
-mavx10.1-512
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512)
-
mavx10.2
UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2)
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index c30a4e0..b195fe5 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -148,24 +148,14 @@
#include <avx10_2mediaintrin.h>
-#include <avx10_2-512mediaintrin.h>
-
#include <avx10_2convertintrin.h>
-#include <avx10_2-512convertintrin.h>
-
#include <avx10_2bf16intrin.h>
-#include <avx10_2-512bf16intrin.h>
-
#include <avx10_2satcvtintrin.h>
-#include <avx10_2-512satcvtintrin.h>
-
#include <avx10_2minmaxintrin.h>
-#include <avx10_2-512minmaxintrin.h>
-
#include <avx10_2copyintrin.h>
#include <movrsintrin.h>
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3d3848c..1bd63b2 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -218,6 +218,7 @@
case UNSPEC_DTPOFF:
case UNSPEC_GOTNTPOFF:
case UNSPEC_NTPOFF:
+ case UNSPEC_SECREL32:
return true;
default:
break;
@@ -392,6 +393,23 @@
return false;
})
+;; Return true if VALUE is a constant integer whose negation satisfies
+;; x86_64_immediate_operand.
+(define_predicate "x86_64_neg_const_int_operand"
+ (match_code "const_int")
+{
+ HOST_WIDE_INT val = -UINTVAL (op);
+ if (mode == DImode && trunc_int_for_mode (val, SImode) != val)
+ return false;
+ if (flag_cf_protection & CF_BRANCH)
+ {
+ unsigned HOST_WIDE_INT endbr = TARGET_64BIT ? 0xfa1e0ff3 : 0xfb1e0ff3;
+ if ((val & HOST_WIDE_INT_C (0xffffffff)) == endbr)
+ return false;
+ }
+ return true;
+})
+
;; Return true if VALUE is a constant integer whose low and high words satisfy
;; x86_64_immediate_operand.
(define_predicate "x86_64_hilo_int_operand"
@@ -1267,12 +1285,19 @@
(match_operand 0 "vector_memory_operand")
(match_code "const_vector")))
+; Return true when OP is register_operand, vector_memory_operand,
+; const_vector zero or const_vector all ones.
+(define_predicate "vector_or_0_or_1s_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "vector_memory_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
(define_predicate "bcst_mem_operand"
(and (match_code "vec_duplicate")
(and (match_test "TARGET_AVX512F")
(ior (match_test "TARGET_AVX512VL")
- (and (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")
- (match_test "TARGET_EVEX512"))))
+ (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
(match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
(match_test "GET_MODE (XEXP (op, 0))
== GET_MODE_INNER (GET_MODE (op))")
@@ -1333,6 +1358,12 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_operand 0 "const0_operand")))
+; Return true when OP is a nonimmediate or zero or all ones.
+(define_predicate "nonimm_or_0_or_1s_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
;; Return true for RTX codes that force SImode address.
(define_predicate "SImode_address_operand"
(match_code "subreg,zero_extend,and"))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b280676..252ba07 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,63 +279,63 @@
;; All vector modes including V?TImode, used in move patterns.
(define_mode_iterator VMOVE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX") V1TI
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline.
(define_mode_iterator V48_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator V48_256_512_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL")])
;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
(define_mode_iterator V48H_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
(define_mode_iterator VI12_AVX512VL
- [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+ [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
(define_mode_iterator VI12HFBF_AVX512VL
- [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
- (V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
- (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+ [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
(define_mode_iterator VI1_AVX512VL
- [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+ [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
;; All vector modes
(define_mode_iterator V
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
;; All 128bit vector modes
(define_mode_iterator V_128
@@ -352,54 +352,44 @@
;; All 512bit vector modes
(define_mode_iterator V_512
- [(V64QI "TARGET_EVEX512") (V32HI "TARGET_EVEX512")
- (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
- (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
- (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+ [V64QI V32HI V16SI V8DI
+ V16SF V8DF V32HF V32BF])
;; All 256bit and 512bit vector modes
(define_mode_iterator V_256_512
[V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF
- (V64QI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HF "TARGET_AVX512F && TARGET_EVEX512")
- (V32BF "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+ (V32HF "TARGET_AVX512F") (V32BF "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
;; All vector float modes
(define_mode_iterator VF
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
(V2DF "TARGET_SSE2")])
(define_mode_iterator VF1_VF2_AVX512DQ
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512DQ")
(V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
(V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
-(define_mode_iterator VF1_VF2_AVX10_2
- [(V16SF "TARGET_AVX10_2") V8SF V4SF
- (V8DF "TARGET_AVX10_2") V4DF V2DF])
-
(define_mode_iterator VFH
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
(V2DF "TARGET_SSE2")])
(define_mode_iterator VF_BHSD
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
+ (V16SF "TARGET_AVX512F")
(V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+ (V8DF "TARGET_AVX512F")
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
(V32BF "TARGET_AVX10_2")
(V16BF "TARGET_AVX10_2")
@@ -408,12 +398,12 @@
;; 128-, 256- and 512-bit float vector modes for bitwise operations
(define_mode_iterator VFB
- [(V32BF "TARGET_AVX512F && TARGET_EVEX512")
+ [(V32BF "TARGET_AVX512F")
(V16BF "TARGET_AVX") (V8BF "TARGET_SSE2")
- (V32HF "TARGET_AVX512F && TARGET_EVEX512")
+ (V32HF "TARGET_AVX512F")
(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F")
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
;; 128- and 256-bit float vector modes
@@ -430,44 +420,39 @@
;; All SFmode vector float modes
(define_mode_iterator VF1
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF])
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
(define_mode_iterator VF1_AVX2
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
;; 128- and 256-bit SF vector modes
(define_mode_iterator VF1_128_256
[(V8SF "TARGET_AVX") V4SF])
(define_mode_iterator VF1_128_256VL
- [(V8SF "TARGET_EVEX512") (V4SF "TARGET_AVX512VL")])
+ [V8SF (V4SF "TARGET_AVX512VL")])
;; All DFmode vector float modes
(define_mode_iterator VF2
- [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
-
-(define_mode_iterator VF2_AVX10_2
- [(V8DF "TARGET_AVX10_2") V4DF V2DF])
+ [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; All DFmode & HFmode & BFmode vector float modes
(define_mode_iterator VF2HB
- [(V32BF "TARGET_AVX10_2")
- (V16BF "TARGET_AVX10_2")
- (V8BF "TARGET_AVX10_2")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2")
+ (V8BF "TARGET_AVX10_2") (V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; 128- and 256-bit DF vector modes
(define_mode_iterator VF2_128_256
[(V4DF "TARGET_AVX") V2DF])
(define_mode_iterator VF2_512_256
- [(V8DF "TARGET_AVX512F && TARGET_EVEX512") V4DF])
+ [(V8DF "TARGET_AVX512F") V4DF])
(define_mode_iterator VF2_512_256VL
- [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+ [V8DF (V4DF "TARGET_AVX512VL")])
;; All 128bit vector SF/DF modes
(define_mode_iterator VF_128
@@ -484,116 +469,102 @@
;; All 512bit vector float modes
(define_mode_iterator VF_512
- [(V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+ [V16SF V8DF])
;; All 512bit vector float modes for bitwise operations
(define_mode_iterator VFB_512
- [(V32BF "TARGET_EVEX512")
- (V32HF "TARGET_EVEX512")
- (V16SF "TARGET_EVEX512")
- (V8DF "TARGET_EVEX512")])
+ [V32BF V32HF V16SF V8DF])
(define_mode_iterator V24F_128
[V4SF V8HF V8BF])
(define_mode_iterator VI48_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI1248_AVX512VLBW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512")
+ [(V64QI "TARGET_AVX512BW")
(V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
(V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ (V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
(V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
- (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
-
-(define_mode_iterator VI1248_AVX10_2
- [(V64QI "TARGET_AVX10_2") V32QI V16QI
- (V32HI "TARGET_AVX10_2") V16HI V8HI
- (V16SI "TARGET_AVX10_2") V8SI V4SI
- (V8DI "TARGET_AVX10_2") V4DI V2DI])
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VF_AVX512VL
- [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator VFH_AVX512VL
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator V48_AVX512VL_4
+ [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL_4
+ [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
-(define_mode_iterator VFH_AVX10_2
- [(V32HF "TARGET_AVX10_2") V16HF V8HF
- (V16SF "TARGET_AVX10_2") V8SF V4SF
- (V8DF "TARGET_AVX10_2") V4DF V2DF])
+(define_mode_iterator V8_AVX512VL_2
+ [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VF2_AVX512VL
- [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator VF1_AVX512VL
- [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
(define_mode_iterator VF1_AVX512BW
- [(V16SF "TARGET_AVX512BW && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
-
-(define_mode_iterator VF1_AVX10_2
- [(V16SF "TARGET_AVX10_2") V8SF V4SF])
+ [(V16SF "TARGET_AVX512BW") (V8SF "TARGET_AVX2") V4SF])
(define_mode_iterator VHFBF
- [(V32HF "TARGET_EVEX512") V16HF V8HF
- (V32BF "TARGET_EVEX512") V16BF V8BF])
+ [V32HF V16HF V8HF V32BF V16BF V8BF])
(define_mode_iterator VHFBF_256 [V16HF V16BF])
(define_mode_iterator VHFBF_128 [V8HF V8BF])
(define_mode_iterator VHF_AVX512VL
- [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
+ [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
(define_mode_iterator VHFBF_AVX512VL
- [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
- (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-
-(define_mode_iterator VHF_AVX10_2
- [(V32HF "TARGET_AVX10_2") V16HF V8HF])
+ [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-(define_mode_iterator VBF_AVX10_2
- [(V32BF "TARGET_AVX10_2") V16BF V8BF])
+(define_mode_iterator VBF
+ [V32BF V16BF V8BF])
;; All vector integer modes
(define_mode_iterator VI
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
+ [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI])
;; All vector integer and HF modes
(define_mode_iterator VIHFBF
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V8SI "TARGET_AVX") V4SI
- (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF])
+ [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+ (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF])
(define_mode_iterator VI_AVX2
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI_AVX_AVX512F
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
;; All QImode vector integer modes
(define_mode_iterator VI1
@@ -611,56 +582,50 @@
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
(define_mode_iterator VI8
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
-
-(define_mode_iterator VI8_AVX10_2
- [(V8DI "TARGET_AVX10_2") V4DI V2DI])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI8_FVL
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")])
+ [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI8_AVX512VL
- [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+ [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI8_256_512
- [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")])
+ [V8DI (V4DI "TARGET_AVX512VL")])
(define_mode_iterator VI1_AVX2
[(V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI1_AVX512
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI1_AVX512F
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
(define_mode_iterator VI1_AVX512VNNI
- [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+ [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI1_AVX512VNNIBW
- [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+ [(V64QI "TARGET_AVX512BW || TARGET_AVX512VNNI")
(V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI12_256_512_AVX512VL
- [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
+ [V64QI (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL")])
(define_mode_iterator VI2_AVX2
[(V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX2_AVX512BW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX512F
- [(V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+ [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX512VNNIBW
- [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+ [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI")
(V16HI "TARGET_AVX2") V8HI])
-(define_mode_iterator VI2_AVX10_2
- [(V32HI "TARGET_AVX10_2") V16HI V8HI])
-
(define_mode_iterator VI4_AVX
[(V8SI "TARGET_AVX") V4SI])
@@ -668,65 +633,64 @@
[(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI4_AVX512F
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI4_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
(define_mode_iterator VI4_AVX10_2
[(V16SI "TARGET_AVX10_2") V8SI V4SI])
(define_mode_iterator VI48_AVX512F_AVX512VL
- [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512")
+ [V4SI V8SI (V16SI "TARGET_AVX512F")
(V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V8DI "TARGET_AVX512F")])
(define_mode_iterator VI2_AVX512VL
- [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")])
+ [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
(define_mode_iterator VI2HFBF_AVX512VL
- [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
- (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") (V32HF "TARGET_EVEX512")
- (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") (V32BF "TARGET_EVEX512")])
+ [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+ (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") V32HF
+ (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") V32BF])
(define_mode_iterator VI2H_AVX512VL
- [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
- (V8SI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512")])
+ [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+ (V8SI "TARGET_AVX512VL") V16SI V8DI])
(define_mode_iterator VI1_AVX512VL_F
- [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F && TARGET_EVEX512")])
+ [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
(define_mode_iterator VI8_AVX2_AVX512BW
- [(V8DI "TARGET_AVX512BW && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX2
[(V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX2_AVX512F
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX_AVX512F
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
(define_mode_iterator VI4_128_8_256
[V4SI V4DI])
;; All V8D* modes
(define_mode_iterator V8FI
- [(V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V8DF V8DI])
;; All V16S* modes
(define_mode_iterator V16FI
- [(V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+ [V16SF V16SI])
;; ??? We should probably use TImode instead.
(define_mode_iterator VIMAX_AVX2_AVX512BW
- [(V4TI "TARGET_AVX512BW && TARGET_EVEX512") (V2TI "TARGET_AVX2") V1TI])
+ [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
;; Suppose TARGET_AVX512BW as baseline
(define_mode_iterator VIMAX_AVX512VL
- [(V4TI "TARGET_EVEX512") (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+ [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
(define_mode_iterator VIMAX_AVX2
[(V2TI "TARGET_AVX2") V1TI])
@@ -736,17 +700,17 @@
(V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI12_AVX2_AVX512BW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI24_AVX2
[(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI124_AVX2
[(V32QI "TARGET_AVX2") V16QI
@@ -754,17 +718,17 @@
(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI248_AVX512VL
- [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+ [V32HI V16SI V8DI
(V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI248_AVX512VLBW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ [(V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
(V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
- (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI48_AVX2
[(V8SI "TARGET_AVX2") V4SI
@@ -776,17 +740,16 @@
(V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512BW && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI248_AVX512BW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16SI "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512")])
+ [(V32HI "TARGET_AVX512BW") V16SI V8DI])
(define_mode_iterator VI248_AVX512BW_AVX512VL
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [(V32HI "TARGET_AVX512BW")
+ (V4DI "TARGET_AVX512VL") V16SI V8DI])
;; Suppose TARGET_AVX512VL as baseline
(define_mode_iterator VI248_AVX512BW_1
@@ -800,16 +763,16 @@
V4DI V2DI])
(define_mode_iterator VI48_AVX512F
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512") V8SI V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI V2DI])
+ [(V16SI "TARGET_AVX512F") V8SI V4SI
+ (V8DI "TARGET_AVX512F") V4DI V2DI])
(define_mode_iterator VI48_AVX_AVX512F
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI12_AVX_AVX512F
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
(define_mode_iterator V48_128_256
[V4SF V2DF
@@ -950,10 +913,10 @@
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
(define_mode_iterator VI248_256 [V16HI V8SI V4DI])
(define_mode_iterator VI248_512
- [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V32HI V16SI V8DI])
(define_mode_iterator VI48_128 [V4SI V2DI])
(define_mode_iterator VI148_512
- [(V64QI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V64QI V16SI V8DI])
(define_mode_iterator VI148_256 [V32QI V8SI V4DI])
(define_mode_iterator VI148_128 [V16QI V4SI V2DI])
@@ -961,75 +924,62 @@
(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
(define_mode_iterator VI124_256_AVX512F_AVX512BW
[V32QI V16HI V8SI
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
+ (V16SI "TARGET_AVX512F")])
(define_mode_iterator VI48_256 [V8SI V4DI])
(define_mode_iterator VI48_512
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V16SI V8DI])
(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
(define_mode_iterator VI_AVX512BW
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")])
+ [V16SI V8DI
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
(define_mode_iterator VIHFBF_AVX512BW
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
- (V32HF "TARGET_AVX512BW && TARGET_EVEX512")
- (V32BF "TARGET_AVX512BW && TARGET_EVEX512")])
+ [V16SI V8DI
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
+ (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")])
;; Int-float size matches
(define_mode_iterator VI2F_256_512
- [V16HI (V32HI "TARGET_EVEX512")
- V16HF (V32HF "TARGET_EVEX512")
- V16BF (V32BF "TARGET_EVEX512")])
+ [V16HI V32HI V16HF V32HF V16BF V32BF])
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
(define_mode_iterator VI8F_256 [V4DI V4DF])
(define_mode_iterator VI4F_256_512
- [V8SI V8SF
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")])
+ [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
(define_mode_iterator VI48F_256_512
[V8SI V8SF
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
+ (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
(define_mode_iterator VF48H_AVX512VL
- [(V8DF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+ [V8DF V16SF (V8SF "TARGET_AVX512VL")])
(define_mode_iterator VF48_128
[V2DF V4SF])
(define_mode_iterator VI48F
- [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+ [V16SI V16SF V8DI V8DF
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator VI12_VI48F_AVX512VL
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+ [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
- (V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+ V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
(define_mode_iterator V8_128 [V8HI V8HF V8BF])
(define_mode_iterator V16_256 [V16HI V16HF V16BF])
(define_mode_iterator V32_512
- [(V32HI "TARGET_EVEX512") (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+ [V32HI V32HF V32BF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse
@@ -1441,7 +1391,7 @@
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
(define_mode_iterator AVX512MODE2P
- [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+ [V16SI V16SF V8DF])
;; Mapping for dbpsabbw modes
(define_mode_attr dbpsadbwmode
@@ -1639,6 +1589,44 @@
"&& 1"
[(set (match_dup 0) (match_dup 1))])
+(define_insn_and_split "*<avx512>_load<mode>mask_and15"
+ [(set (match_operand:V48_AVX512VL_4 0 "register_operand" "=v")
+ (vec_merge:V48_AVX512VL_4
+ (unspec:V48_AVX512VL_4
+ [(match_operand:V48_AVX512VL_4 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+ (match_operand:V48_AVX512VL_4 2 "nonimm_or_0_operand" "0C")
+ (and:QI
+ (match_operand:QI 3 "register_operand" "Yk")
+ (const_int 15))))]
+ "TARGET_AVX512F"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (vec_merge:V48_AVX512VL_4
+ (unspec:V48_AVX512VL_4 [(match_dup 1)] UNSPEC_MASKLOAD)
+ (match_dup 2)
+ (match_dup 3)))])
+
+(define_insn_and_split "*<avx512>_load<mode>mask_and3"
+ [(set (match_operand:V8_AVX512VL_2 0 "register_operand" "=v")
+ (vec_merge:V8_AVX512VL_2
+ (unspec:V8_AVX512VL_2
+ [(match_operand:V8_AVX512VL_2 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+ (match_operand:V8_AVX512VL_2 2 "nonimm_or_0_operand" "0C")
+ (and:QI
+ (match_operand:QI 3 "register_operand" "Yk")
+ (const_int 3))))]
+ "TARGET_AVX512F"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (vec_merge:V8_AVX512VL_2
+ (unspec:V8_AVX512VL_2 [(match_dup 1)] UNSPEC_MASKLOAD)
+ (match_dup 2)
+ (match_dup 3)))])
+
(define_expand "<avx512>_load<mode>_mask"
[(set (match_operand:VI12_AVX512VL 0 "register_operand")
(vec_merge:VI12_AVX512VL
@@ -2049,11 +2037,9 @@
(define_mode_iterator STORENT_MODE
[(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
(define_expand "storent<mode>"
[(set (match_operand:STORENT_MODE 0 "memory_operand")
@@ -2857,10 +2843,10 @@
})
(define_expand "div<mode>3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand")
- (div:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand")
- (match_operand:VBF_AVX10_2 2 "vector_operand")))]
+ [(set (match_operand:VBF 0 "register_operand")
+ (div:VBF
+ (match_operand:VBF 1 "register_operand")
+ (match_operand:VBF 2 "vector_operand")))]
"TARGET_AVX10_2"
{
if (TARGET_RECIP_VEC_DIV
@@ -3897,15 +3883,12 @@
(define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
+ (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V64QI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_PLUS_MODE
@@ -3948,13 +3931,11 @@
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
- (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512BW")
+ (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "reduc_<code>_scal_<mode>"
[(smaxmin:REDUC_SMINMAX_MODE
@@ -4063,10 +4044,8 @@
(define_mode_iterator REDUC_ANY_LOGIC_MODE
[(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V64QI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
(define_expand "reduc_<code>_scal_<mode>"
[(any_logic:REDUC_ANY_LOGIC_MODE
@@ -4410,7 +4389,7 @@
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
- (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
"TARGET_AVX512F
&& (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
@@ -4428,7 +4407,7 @@
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ (match_operand:SI 3 "<cmp_imm_predicate>")]
UNSPEC_PCMP)))
(set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
(unspec:<V48H_AVX512VL:avx512fmaskmode>
@@ -4469,7 +4448,8 @@
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8
+ && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -4480,6 +4460,70 @@
UNSPEC_PCMP))]
"operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
+(define_insn "*<avx512>_cmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v")
+ (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512vl_ucmpv2di3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V2DI 1 "nonimmediate_operand" "v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -4762,7 +4806,8 @@
(match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_7_operand")]
UNSPEC_UNSIGNED_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()
+ && GET_MODE_NUNITS (<MODE>mode) >= 8"
"#"
"&& 1"
[(set (match_dup 0)
@@ -4923,8 +4968,8 @@
(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(match_operator:<avx512fmaskmode> 1 ""
- [(match_operand:VBF_AVX10_2 2 "register_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")]))]
+ [(match_operand:VBF 2 "register_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")]))]
"TARGET_AVX10_2"
{
bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -5142,7 +5187,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_256_AVX2 0 "register_operand")
(vec_merge:VI_256_AVX2
- (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
+ (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
(match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5155,7 +5200,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_128 0 "register_operand")
(vec_merge:VI_128
- (match_operand:VI_128 1 "vector_operand")
+ (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
(match_operand:VI_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE2"
@@ -5168,7 +5213,7 @@
(define_expand "vcond_mask_v1tiv1ti"
[(set (match_operand:V1TI 0 "register_operand")
(vec_merge:V1TI
- (match_operand:V1TI 1 "vector_operand")
+ (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
(match_operand:V1TI 2 "nonimm_or_0_operand")
(match_operand:V1TI 3 "register_operand")))]
"TARGET_SSE2"
@@ -5181,7 +5226,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_256 0 "register_operand")
(vec_merge:VF_256
- (match_operand:VF_256 1 "nonimmediate_operand")
+ (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
(match_operand:VF_256 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5194,7 +5239,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
- (match_operand:VF_128 1 "vector_operand")
+ (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
(match_operand:VF_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE"
@@ -5573,7 +5618,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
@@ -5630,7 +5675,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx_noavx512vl,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx_noavx512f,avx512vl,avx512f")
(set_attr "addr" "*,gpr16,*,*")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
@@ -5703,7 +5748,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
@@ -5765,7 +5810,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
(if_then_else
@@ -5811,15 +5856,10 @@
(V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
- (V8BF "TARGET_AVX10_2")
- (V16BF "TARGET_AVX10_2")
- (V32BF "TARGET_AVX10_2")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32HF "TARGET_AVX512FP16")
+ (V8BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") (V32BF "TARGET_AVX10_2")])
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
@@ -5857,8 +5897,7 @@
(V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
(define_mode_iterator FMAMODE
[SF DF V4SF V2DF V8SF V4DF])
@@ -5928,14 +5967,12 @@
;; Suppose AVX-512F as baseline
(define_mode_iterator VFH_SF_AVX512VL
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(HF "TARGET_AVX512FP16")
- SF (V16SF "TARGET_EVEX512")
- (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- DF (V8DF "TARGET_EVEX512")
- (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
@@ -8683,7 +8720,7 @@
(unspec:V16SI
[(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -8751,7 +8788,7 @@
(unspec:V16SI
[(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_VCVTT_U))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttps2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -8761,7 +8798,7 @@
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_fix:V16SI
(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9349,7 +9386,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtdq2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9385,7 +9422,7 @@
(unspec:V8SI
[(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9544,7 +9581,7 @@
(unspec:V8SI
[(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_VCVTT_U))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttpd2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9554,7 +9591,7 @@
[(set (match_operand:V8SI 0 "register_operand" "=v")
(any_fix:V8SI
(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -10070,7 +10107,7 @@
[(set (match_operand:V8SF 0 "register_operand" "=v")
(float_truncate:V8SF
(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -10232,7 +10269,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -10438,7 +10475,7 @@
(set (match_operand:V8DF 0 "register_operand")
(float_extend:V8DF
(match_dup 2)))]
-"TARGET_AVX512F && TARGET_EVEX512"
+"TARGET_AVX512F"
"operands[2] = gen_reg_rtx (V8SFmode);")
(define_expand "vec_unpacks_lo_v4sf"
@@ -10576,7 +10613,7 @@
(set (match_operand:V8DF 0 "register_operand")
(float:V8DF
(match_dup 2)))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"operands[2] = gen_reg_rtx (V8SImode);")
(define_expand "vec_unpacks_float_lo_v16si"
@@ -10588,7 +10625,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "vec_unpacku_float_hi_v4si"
[(set (match_dup 5)
@@ -10684,7 +10721,7 @@
(define_expand "vec_unpacku_float_hi_v16si"
[(match_operand:V8DF 0 "register_operand")
(match_operand:V16SI 1 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
REAL_VALUE_TYPE TWO32r;
rtx k, x, tmp[4];
@@ -10733,7 +10770,7 @@
(define_expand "vec_unpacku_float_lo_v16si"
[(match_operand:V8DF 0 "register_operand")
(match_operand:V16SI 1 "nonimmediate_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
REAL_VALUE_TYPE TWO32r;
rtx k, x, tmp[3];
@@ -10827,7 +10864,7 @@
[(match_operand:V16SI 0 "register_operand")
(match_operand:V8DF 1 "nonimmediate_operand")
(match_operand:V8DF 2 "nonimmediate_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx r1, r2;
@@ -10942,7 +10979,7 @@
[(match_operand:V16SI 0 "register_operand")
(match_operand:V8DF 1 "nonimmediate_operand")
(match_operand:V8DF 2 "nonimmediate_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx r1, r2;
@@ -11135,7 +11172,7 @@
(const_int 11) (const_int 27)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -11223,7 +11260,7 @@
(const_int 9) (const_int 25)
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -11363,7 +11400,7 @@
(const_int 11) (const_int 11)
(const_int 13) (const_int 13)
(const_int 15) (const_int 15)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
@@ -11416,7 +11453,7 @@
(const_int 10) (const_int 10)
(const_int 12) (const_int 12)
(const_int 14) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
@@ -12376,9 +12413,7 @@
(V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")])
(define_mode_iterator AVX512_VEC
- [(V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
- (V8DI "TARGET_AVX512DQ && TARGET_EVEX512")
- (V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+ [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
[(match_operand:<ssequartermode> 0 "nonimmediate_operand")
@@ -12547,9 +12582,7 @@
[(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
(define_mode_iterator AVX512_VEC_2
- [(V16SF "TARGET_AVX512DQ && TARGET_EVEX512")
- (V16SI "TARGET_AVX512DQ && TARGET_EVEX512")
- (V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
@@ -13110,7 +13143,7 @@
(const_int 26) (const_int 27)
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
if (TARGET_AVX512VL
@@ -13159,7 +13192,7 @@
(const_int 58) (const_int 59)
(const_int 60) (const_int 61)
(const_int 62) (const_int 63)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
[(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
@@ -13257,15 +13290,15 @@
;; Modes handled by vec_extract patterns.
(define_mode_iterator VEC_EXTRACT_MODE
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF
- (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
(define_expand "vec_extract<mode><ssescalarmodelower>"
[(match_operand:<ssescalarmode> 0 "register_operand")
@@ -13307,7 +13340,7 @@
(const_int 3) (const_int 11)
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -13421,9 +13454,9 @@
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
- [(set_attr "type" "sselog1")
+ [(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
@@ -13437,7 +13470,7 @@
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -13454,7 +13487,7 @@
(const_int 2) (const_int 6)])))]
"TARGET_AVX && <mask_avx512vl_condition>"
"vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
- [(set_attr "type" "sselog1")
+ [(set_attr "type" "ssemov")
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "V4DF")])
@@ -13649,7 +13682,7 @@
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
/* Disallow embeded broadcast for vector HFmode since
it's not real AVX512FP16 instruction. */
&& (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
@@ -13731,7 +13764,7 @@
[(set (match_operand:V 0 "register_operand")
(match_operand:V 1 "ternlog_operand"))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -13761,7 +13794,7 @@
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")
(match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -13846,7 +13879,7 @@
(match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -13930,7 +13963,7 @@
(match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -14080,7 +14113,7 @@
(match_operand:SI 3 "const_0_to_255_operand")
(match_operand:V16SF 4 "register_operand")
(match_operand:HI 5 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask = INTVAL (operands[3]);
emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
@@ -14267,7 +14300,7 @@
(match_operand 16 "const_12_to_15_operand")
(match_operand 17 "const_28_to_31_operand")
(match_operand 18 "const_28_to_31_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
&& INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
&& INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
@@ -14302,7 +14335,7 @@
(match_operand:SI 3 "const_0_to_255_operand")
(match_operand:V8DF 4 "register_operand")
(match_operand:QI 5 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask = INTVAL (operands[3]);
emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
@@ -14332,7 +14365,7 @@
(match_operand 8 "const_12_to_13_operand")
(match_operand 9 "const_6_to_7_operand")
(match_operand 10 "const_14_to_15_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask;
mask = INTVAL (operands[3]);
@@ -14464,7 +14497,7 @@
(const_int 3) (const_int 11)
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -14514,7 +14547,7 @@
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -14880,7 +14913,7 @@
(set_attr "mode" "V2DF,DF,V8DF")
(set (attr "enabled")
(cond [(eq_attr "alternative" "2")
- (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+ (symbol_ref "TARGET_AVX512F
&& !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
(match_test "<mask_avx512vl_condition>")
(const_string "*")
@@ -14965,13 +14998,13 @@
[(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
(truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
[(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
(any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -14993,7 +15026,7 @@
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
- "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512BW && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15018,7 +15051,7 @@
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
- "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512BW && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15102,7 +15135,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15118,7 +15151,7 @@
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
(match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -15132,19 +15165,19 @@
(match_operand:<pmov_src_mode> 1 "register_operand"))
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "truncv32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand")
(truncate:V32QI
(match_operand:V32HI 1 "register_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512")
+ "TARGET_AVX512BW")
(define_insn "avx512bw_<code>v32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
(any_truncate:V32QI
(match_operand:V32HI 1 "register_operand" "v,v")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -15174,7 +15207,7 @@
(const_int 26) (const_int 27)
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
- "TARGET_AVX512VBMI && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512VBMI && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15190,7 +15223,7 @@
(match_operand:V32HI 1 "register_operand" "v,v"))
(match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
(match_operand:SI 3 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -15204,7 +15237,7 @@
(match_operand:V32HI 1 "register_operand"))
(match_dup 0)
(match_operand:SI 2 "register_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512")
+ "TARGET_AVX512BW")
(define_mode_iterator PMOV_DST_MODE_2
[V4SI V8HI (V16QI "TARGET_AVX512BW")])
@@ -16062,7 +16095,7 @@
[(set (match_operand:V8QI 0 "register_operand")
(truncate:V8QI
(match_operand:V8DI 1 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx op0 = gen_reg_rtx (V16QImode);
@@ -16082,7 +16115,7 @@
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -16092,7 +16125,7 @@
[(set (match_operand:V8QI 0 "memory_operand" "=m")
(any_truncate:V8QI
(match_operand:V8DI 1 "register_operand" "v")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
@@ -16104,7 +16137,7 @@
(subreg:DI
(any_truncate:V8QI
(match_operand:V8DI 1 "register_operand")) 0))]
- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -16128,7 +16161,7 @@
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -16149,7 +16182,7 @@
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -16162,7 +16195,7 @@
(match_operand:V8DI 1 "register_operand" "v"))
(match_dup 0)
(match_operand:QI 2 "register_operand" "Yk")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
@@ -16174,7 +16207,7 @@
(any_truncate:V8QI
(match_operand:V8DI 1 "register_operand"))
(match_operand:QI 2 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
operands[0] = adjust_address_nv (operands[0], V8QImode, 0);
emit_insn (gen_avx512f_<code>v8div16qi2_mask_store_1 (operands[0],
@@ -16431,7 +16464,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
(define_insn "*vec_widen_umult_even_v16si<mask_name>"
@@ -16451,7 +16484,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
@@ -16547,7 +16580,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
(define_insn "*vec_widen_smult_even_v16si<mask_name>"
@@ -16567,7 +16600,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
@@ -16969,7 +17002,7 @@
"TARGET_SSE2"
{
/* Try with vnni instructions. */
- if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI && TARGET_EVEX512)
+ if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI)
|| (<MODE_SIZE> < 64
&& ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)))
{
@@ -17062,7 +17095,7 @@
(match_operand:V64QI 1 "register_operand")
(match_operand:V64QI 2 "nonimmediate_operand")
(match_operand:V16SI 3 "nonimmediate_operand")]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
{
rtx t1 = gen_reg_rtx (V8DImode);
rtx t2 = gen_reg_rtx (V16SImode);
@@ -18300,13 +18333,10 @@
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
(V16HF "TARGET_AVX512FP16")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")
+ (V32HF "TARGET_AVX512FP16")])
(define_expand "vec_perm<mode>"
[(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -18333,7 +18363,7 @@
{
operands[2] = CONSTM1_RTX (<MODE>mode);
- if (!TARGET_AVX512F || (!TARGET_AVX512VL && !TARGET_EVEX512))
+ if (!TARGET_AVX512F)
operands[2] = force_reg (<MODE>mode, operands[2]);
})
@@ -18342,7 +18372,6 @@
(xor:VI (match_operand:VI 1 "bcst_vector_operand" " 0, m,Br")
(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))]
"TARGET_AVX512F
- && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)
&& (!<mask_applied>
|| <ssescalarmode>mode == SImode
|| <ssescalarmode>mode == DImode)"
@@ -18409,7 +18438,7 @@
(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))
(unspec [(match_operand:VI 3 "register_operand" "0,0,0")]
UNSPEC_INSN_FALSE_DEP)]
- "TARGET_AVX512F && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)"
+ "TARGET_AVX512F"
{
if (TARGET_AVX512VL)
return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
@@ -18433,7 +18462,7 @@
(not:<ssescalarmode>
(match_operand:<ssescalarmode> 1 "nonimmediate_operand"))))]
"<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
[(set (match_dup 0)
(xor:VI48_AVX512F
(vec_duplicate:VI48_AVX512F (match_dup 1))
@@ -18587,8 +18616,7 @@
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(eq_attr "alternative" "4")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512
- && !TARGET_PREFER_AVX256)")
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
]
(const_string "*")))])
@@ -18632,7 +18660,7 @@
(match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
(match_operand:VI 2 "vector_operand")))]
"<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
[(set (match_dup 3)
(vec_duplicate:VI (match_dup 1)))
(set (match_dup 0)
@@ -18647,7 +18675,7 @@
(match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
(match_operand:VI 2 "vector_operand")))]
"<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
[(set (match_dup 3)
(vec_duplicate:VI (match_dup 1)))
(set (match_dup 0)
@@ -18941,7 +18969,7 @@
(match_operand:VI 1 "bcst_vector_operand" "0,m, 0,vBr"))
(match_operand:VI 2 "bcst_vector_operand" "m,0,vBr, 0")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
@@ -18974,7 +19002,7 @@
(match_operand:VI 1 "bcst_vector_operand" "%0, 0")
(match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
@@ -19005,7 +19033,7 @@
(not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0"))
(not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
@@ -19027,7 +19055,7 @@
(const_string "*")))])
(define_mode_iterator AVX512ZEXTMASK
- [(DI "TARGET_AVX512BW && TARGET_EVEX512") (SI "TARGET_AVX512BW") HI])
+ [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
@@ -19276,7 +19304,7 @@
(const_int 60) (const_int 61)
(const_int 62) (const_int 63)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix>")
@@ -19345,7 +19373,7 @@
(const_int 14) (const_int 15)
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix>")
@@ -19407,7 +19435,7 @@
(const_int 61) (const_int 125)
(const_int 62) (const_int 126)
(const_int 63) (const_int 127)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -19503,7 +19531,7 @@
(const_int 53) (const_int 117)
(const_int 54) (const_int 118)
(const_int 55) (const_int 119)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -19727,7 +19755,7 @@
(const_int 11) (const_int 27)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -19782,7 +19810,7 @@
(const_int 9) (const_int 25)
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -20488,7 +20516,7 @@
(match_operand:SI 2 "const_0_to_255_operand")
(match_operand:V16SI 3 "register_operand")
(match_operand:HI 4 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask = INTVAL (operands[2]);
emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
@@ -20532,7 +20560,7 @@
(match_operand 15 "const_12_to_15_operand")
(match_operand 16 "const_12_to_15_operand")
(match_operand 17 "const_12_to_15_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& INTVAL (operands[2]) + 4 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 4 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 4 == INTVAL (operands[8])
@@ -20698,7 +20726,7 @@
[(match_operand:V32HI 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_PSHUFLW))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -20874,7 +20902,7 @@
[(match_operand:V32HI 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_PSHUFHW))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -21408,7 +21436,7 @@
(match_operand:V4TI 1 "register_operand" "v")
(parallel
[(match_operand:SI 2 "const_0_to_3_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
@@ -21416,7 +21444,7 @@
(set_attr "mode" "XI")])
(define_mode_iterator VEXTRACTI128_MODE
- [(V4TI "TARGET_AVX512F && TARGET_EVEX512") V2TI])
+ [(V4TI "TARGET_AVX512F") V2TI])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand")
@@ -21439,7 +21467,7 @@
&& VECTOR_MODE_P (GET_MODE (operands[1]))
&& ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
|| (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
- || (TARGET_AVX512F && TARGET_EVEX512
+ || (TARGET_AVX512F
&& GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
&& (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
[(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
@@ -22814,7 +22842,7 @@
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
(set_attr "prefix" "evex")
@@ -23328,10 +23356,10 @@
;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
;; modes for abs instruction on pre AVX-512 targets.
(define_mode_iterator VI1248_AVX512VL_AVX512BW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL")])
(define_insn "*abs<mode>2"
@@ -24159,7 +24187,7 @@
[(set (match_operand:V32HI 0 "register_operand" "=v")
(any_extend:V32HI
(match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24173,7 +24201,7 @@
(match_operand:V64QI 2 "const0_operand"))
(match_parallel 3 "pmovzx_parallel"
[(match_operand 4 "const_int_operand")])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24193,7 +24221,7 @@
(match_operand:V64QI 3 "const0_operand"))
(match_parallel 4 "pmovzx_parallel"
[(match_operand 5 "const_int_operand")])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24206,7 +24234,7 @@
[(set (match_operand:V32HI 0 "register_operand")
(any_extend:V32HI
(match_operand:V32QI 1 "nonimmediate_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512")
+ "TARGET_AVX512BW")
(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
[(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
@@ -24354,7 +24382,7 @@
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24364,7 +24392,7 @@
[(set (match_operand:V16SI 0 "register_operand")
(any_extend:V16SI
(match_operand:V16QI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "avx2_<code>v8qiv8si2<mask_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
@@ -24497,7 +24525,7 @@
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24507,7 +24535,7 @@
[(set (match_operand:V16SI 0 "register_operand")
(any_extend:V16SI
(match_operand:V16HI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
[(set (match_operand:V32HI 0 "register_operand" "=v")
@@ -24517,7 +24545,7 @@
(match_operand:V32HI 2 "const0_operand"))
(match_parallel 3 "pmovzx_parallel"
[(match_operand 4 "const_int_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
@@ -24741,7 +24769,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24751,7 +24779,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8QI 1 "memory_operand" "m")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24769,7 +24797,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -24780,7 +24808,7 @@
[(set (match_operand:V8DI 0 "register_operand")
(any_extend:V8DI
(match_operand:V8QI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
if (!MEM_P (operands[1]))
{
@@ -24922,7 +24950,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24932,7 +24960,7 @@
[(set (match_operand:V8DI 0 "register_operand")
(any_extend:V8DI
(match_operand:V8HI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "avx2_<code>v4hiv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25059,7 +25087,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -25073,7 +25101,7 @@
(match_operand:V16SI 2 "const0_operand"))
(match_parallel 3 "pmovzx_parallel"
[(match_operand 4 "const_int_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -25092,7 +25120,7 @@
(match_operand:V16SI 3 "const0_operand"))
(match_parallel 4 "pmovzx_parallel"
[(match_operand 5 "const_int_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -25104,7 +25132,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "avx2_<code>v4siv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25505,7 +25533,7 @@
[(match_operand:V16SI 0 "register_operand")
(match_operand:V16SF 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_15_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx tmp = gen_reg_rtx (V16SFmode);
emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
@@ -26723,7 +26751,7 @@
(ashiftrt:V8DI
(match_operand:V8DI 1 "register_operand")
(match_operand:V8DI 2 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "vashrv4di3"
[(set (match_operand:V4DI 0 "register_operand")
@@ -26814,7 +26842,7 @@
[(set (match_operand:V16SI 0 "register_operand")
(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
(match_operand:V16SI 2 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "vashrv8si3"
[(set (match_operand:V8SI 0 "register_operand")
@@ -27257,12 +27285,12 @@
(set_attr "mode" "OI")])
(define_mode_attr pbroadcast_evex_isa
- [(V64QI "avx512bw_512") (V32QI "avx512bw") (V16QI "avx512bw")
- (V32HI "avx512bw_512") (V16HI "avx512bw") (V8HI "avx512bw")
- (V16SI "avx512f_512") (V8SI "avx512f") (V4SI "avx512f")
- (V8DI "avx512f_512") (V4DI "avx512f") (V2DI "avx512f")
- (V32HF "avx512bw_512") (V16HF "avx512bw") (V8HF "avx512bw")
- (V32BF "avx512bw_512") (V16BF "avx512bw") (V8BF "avx512bw")])
+ [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+ (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+ (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+ (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")
+ (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")
+ (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")])
(define_insn "avx2_pbroadcast<mode>"
[(set (match_operand:VIHFBF 0 "register_operand" "=x,v")
@@ -27806,7 +27834,7 @@
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
- && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+ && !TARGET_PREFER_AVX256")
(const_string "*")))])
(define_insn "*vec_dupv4si"
@@ -27834,7 +27862,7 @@
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
- && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+ && !TARGET_PREFER_AVX256")
(const_string "*")))])
(define_insn "*vec_dupv2di"
@@ -27849,7 +27877,7 @@
%vmovddup\t{%1, %0|%0, %1}
movlhps\t%0, %0"
[(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx")
- (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov")
+ (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov")
(set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig")
(set (attr "mode")
(cond [(and (eq_attr "alternative" "2")
@@ -27865,8 +27893,7 @@
(if_then_else
(eq_attr "alternative" "2")
(symbol_ref "TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512
- && !TARGET_PREFER_AVX256)")
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
(const_string "*")))])
(define_insn "avx2_vbroadcasti128_<mode>"
@@ -27946,7 +27973,7 @@
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_evex")
- (set_attr "isa" "avx2,noavx2,avx2,avx512f_512,noavx2")
+ (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
(set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
(define_split
@@ -28010,8 +28037,8 @@
;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
(define_mode_iterator VI4F_BRCST32x2
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL")])
(define_mode_attr 64x2mode
[(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
@@ -28061,8 +28088,7 @@
;; For broadcast[i|f]64x2
(define_mode_iterator VI8F_BRCST64x2
- [(V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
+ [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
[(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
@@ -28118,27 +28144,26 @@
(set_attr "mode" "<sseinsnmode>")])
(define_mode_iterator VPERMI2
- [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+ [V16SI V16SF V8DI V8DF
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ (V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
(V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
- (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+ (V64QI "TARGET_AVX512VBMI")
(V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
(V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
(define_mode_iterator VPERMI2I
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+ [V16SI V8DI
(V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ (V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
(V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
- (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+ (V64QI "TARGET_AVX512VBMI")
(V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
(V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
@@ -28813,29 +28838,28 @@
;; Modes handled by vec_init expanders.
(define_mode_iterator VEC_INIT_MODE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
- (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
;; Likewise, but for initialization from half sized vectors.
;; Thus, these are all VEC_INIT_MODE modes except V2??.
(define_mode_iterator VEC_INIT_HALF_MODE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
- (V4TI "TARGET_AVX512F && TARGET_EVEX512")])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
+ (V4TI "TARGET_AVX512F")])
(define_expand "vec_init<mode><ssescalarmodelower>"
[(match_operand:VEC_INIT_MODE 0 "register_operand")
@@ -29096,7 +29120,7 @@
(unspec:V16SF
[(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_VCVTPH2PS))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -29186,7 +29210,7 @@
UNSPEC_VCVTPS2PH)
(match_operand:V16HI 3 "nonimm_or_0_operand")
(match_operand:HI 4 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int round = INTVAL (operands[2]);
/* Separate {sae} from rounding control imm,
@@ -29205,7 +29229,7 @@
[(match_operand:V16SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_VCVTPS2PH))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -29217,7 +29241,7 @@
[(match_operand:V16SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_VCVTPS2PH))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -30196,7 +30220,7 @@
(match_operand:V8DI 2 "register_operand" "v")
(match_operand:V8DI 3 "nonimmediate_operand" "vm")]
VPMADD52))]
- "TARGET_AVX512IFMA && TARGET_EVEX512"
+ "TARGET_AVX512IFMA"
"vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "prefix" "evex")
@@ -30567,7 +30591,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPBUSD))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30636,7 +30660,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPBUSDS))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30705,7 +30729,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPWSSD))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30774,7 +30798,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPWSSDS))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30930,8 +30954,7 @@
(set_attr "mode" "<sseinsnmode>")])
(define_mode_iterator VI48_AVX512VP2VL
- [(V8DI "TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
(V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
(define_mode_iterator MASK_DWI [P2QI P2HI])
@@ -30973,12 +30996,12 @@
(unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
(match_operand:V16SI 2 "vector_operand" "vm")]
UNSPEC_VP2INTERSECT))]
- "TARGET_AVX512VP2INTERSECT && TARGET_EVEX512"
+ "TARGET_AVX512VP2INTERSECT"
"vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr ("prefix") ("evex"))])
(define_mode_iterator VF_AVX512BF16VL
- [(V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+ [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
;; Converting from BF to SF
(define_mode_attr bf16_cvt_2sf
[(V32BF "V16SF") (V16BF "V8SF") (V8BF "V4SF")])
@@ -31098,7 +31121,7 @@
"vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
(define_mode_iterator VF1_AVX512_256
- [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+ [V16SF (V8SF "TARGET_AVX512VL")])
(define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
[(match_operand:<sf_cvt_bf16> 0 "register_operand")
@@ -31144,7 +31167,7 @@
[(set (match_operand:V16BF 0 "register_operand")
(float_truncate:V16BF
(match_operand:V16SF 1 "nonimmediate_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512
+ "TARGET_AVX512BW
&& !HONOR_NANS (BFmode) && !flag_rounding_math
&& (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
{
@@ -31428,10 +31451,10 @@
;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15
(define_mode_iterator INT_BROADCAST_MODE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512 && TARGET_64BIT")
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F && TARGET_64BIT")
(V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
;; Broadcast from an integer. NB: Enable broadcast only if we can move
@@ -31705,8 +31728,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>"
- [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
- (vec_concat:VHF_AVX10_2
+ [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+ (vec_concat:VHF_AVX512VL
(float_truncate:<ssehalfvecmode>
(match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>"))
(float_truncate:<ssehalfvecmode>
@@ -31730,8 +31753,8 @@
(define_insn "vcvt<convertfp8_pack><mode><mask_name>"
[(set (match_operand:<ssebvecmode> 0 "register_operand" "=v")
(unspec:<ssebvecmode>
- [(match_operand:VHF_AVX10_2 1 "register_operand" "v")
- (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VHF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_CONVERTFP8_PACK))]
"TARGET_AVX10_2"
"vcvt<convertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}"
@@ -31814,7 +31837,7 @@
[(set_attr "prefix" "evex")])
(define_mode_iterator VHF_AVX10_2_2
- [(V32HF "TARGET_AVX10_2") V16HF])
+ [V32HF V16HF])
(define_insn "vcvt<biasph2fp8_pack><mode><mask_name>"
[(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
@@ -31911,8 +31934,8 @@
[(set_attr "prefix" "evex")])
(define_insn "vcvthf82ph<mode><mask_name>"
- [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
- (unspec:VHF_AVX10_2
+ [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VHF_AVX512VL
[(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")]
UNSPEC_VCVTHF82PH))]
"TARGET_AVX10_2"
@@ -31934,8 +31957,8 @@
(define_expand "usdot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX10_2 1 "register_operand")
- (match_operand:VI2_AVX10_2 2 "register_operand")
+ (match_operand:VI2_AVX512F 1 "register_operand")
+ (match_operand:VI2_AVX512F 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
{
@@ -31952,8 +31975,8 @@
(define_expand "udot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX10_2 1 "register_operand")
- (match_operand:VI2_AVX10_2 2 "register_operand")
+ (match_operand:VI2_AVX512F 1 "register_operand")
+ (match_operand:VI2_AVX512F 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
{
@@ -32032,23 +32055,23 @@
[(set_attr "prefix" "evex")])
(define_insn "vdpphps_<mode>"
- [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
- (unspec:VF1_AVX10_2
- [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
- (match_operand:VF1_AVX10_2 2 "register_operand" "v")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VDPPHPS))]
"TARGET_AVX10_2"
"vdpphps\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "prefix" "evex")])
(define_insn "vdpphps_<mode>_mask"
- [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VF1_AVX10_2
- (unspec:VF1_AVX10_2
- [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
- (match_operand:VF1_AVX10_2 2 "register_operand" "v")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF1_AVX512VL
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VDPPHPS)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -32057,10 +32080,10 @@
[(set_attr "prefix" "evex")])
(define_expand "vdpphps_<mode>_maskz"
- [(match_operand:VF1_AVX10_2 0 "register_operand")
- (match_operand:VF1_AVX10_2 1 "register_operand")
- (match_operand:VF1_AVX10_2 2 "register_operand")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VF1_AVX512VL 0 "register_operand")
+ (match_operand:VF1_AVX512VL 1 "register_operand")
+ (match_operand:VF1_AVX512VL 2 "register_operand")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32070,60 +32093,60 @@
})
(define_insn "vdpphps_<mode>_maskz_1"
- [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VF1_AVX10_2
- (unspec:VF1_AVX10_2
- [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
- (match_operand:VF1_AVX10_2 2 "register_operand" "v")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF1_AVX512VL
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VDPPHPS)
- (match_operand:VF1_AVX10_2 4 "const0_operand" "C")
+ (match_operand:VF1_AVX512VL 4 "const0_operand" "C")
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
"TARGET_AVX10_2"
"vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_scalefbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")]
UNSPEC_VSCALEFBF16))]
"TARGET_AVX10_2"
"vscalefbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")])
(define_expand "<code><mode>3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand")
- (smaxmin:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))]
+ [(set (match_operand:VBF 0 "register_operand")
+ (smaxmin:VBF
+ (match_operand:VBF 1 "register_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")))]
"TARGET_AVX10_2")
(define_insn "avx10_2_<code>bf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (smaxmin:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (smaxmin:VBF
+ (match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2"
"v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
(define_insn "avx10_2_<insn>bf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (plusminusmultdiv:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (plusminusmultdiv:VBF
+ (match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2"
"v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")])
(define_expand "avx10_2_fmaddbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32135,11 +32158,11 @@
})
(define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
"TARGET_AVX10_2"
"@
vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32150,12 +32173,12 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmaddbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32167,12 +32190,12 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmaddbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32182,10 +32205,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx10_2_fnmaddbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32197,12 +32220,12 @@
})
(define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
"TARGET_AVX10_2"
"@
vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32213,13 +32236,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmaddbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32231,13 +32254,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmaddbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32247,10 +32270,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx10_2_fmsubbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32262,12 +32285,12 @@
})
(define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
"TARGET_AVX10_2"
"@
vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32278,13 +32301,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmsubbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32296,13 +32319,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmsubbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32312,10 +32335,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx10_2_fnmsubbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32327,13 +32350,13 @@
})
(define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
"TARGET_AVX10_2"
"@
vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32344,14 +32367,14 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmsubbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32363,14 +32386,14 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmsubbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32380,35 +32403,35 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_rsqrtbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
UNSPEC_RSQRT))]
"TARGET_AVX10_2"
"vrsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_sqrtbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (sqrt:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (sqrt:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2"
"vsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_rcpbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
UNSPEC_RCP))]
"TARGET_AVX10_2"
"vrcpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_getexpbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
UNSPEC_GETEXP))]
"TARGET_AVX10_2"
"vgetexpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -32425,9 +32448,9 @@
(UNSPEC_VGETMANTBF16 "getmant")])
(define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
BF16IMMOP))]
"TARGET_AVX10_2"
@@ -32437,7 +32460,7 @@
(define_insn "avx10_2_fpclassbf16_<mode><mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")
(match_operand 2 "const_0_to_255_operand")]
UNSPEC_VFPCLASSBF16))]
"TARGET_AVX10_2"
@@ -32447,8 +32470,8 @@
(define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
- [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+ [(match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
(match_operand 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP))]
"TARGET_AVX10_2"
@@ -32486,7 +32509,7 @@
(define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs<mode><mask_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")]
+ [(match_operand:VBF 1 "vector_operand" "vm")]
UNSPEC_CVT_BF16_IBS_ITER))]
"TARGET_AVX10_2"
"vcvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -32501,7 +32524,7 @@
(define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+ [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_CVT_PH_IBS_ITER))]
"TARGET_AVX10_2 && <round_mode512bit_condition>"
"vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -32516,7 +32539,7 @@
(define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VHF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_CVTT_PH_IBS_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32531,7 +32554,7 @@
(define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+ [(match_operand:VF1_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_CVT_PS_IBS_ITER))]
"TARGET_AVX10_2 && <round_mode512bit_condition>"
"vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -32546,7 +32569,7 @@
(define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VF1_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_CVTT_PS_IBS_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32565,7 +32588,7 @@
(define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v")
(unspec:<VEC_GATHER_IDXSI>
- [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_SAT_CVT_DS_SIGN_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32576,7 +32599,7 @@
(define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v")
(unspec:<VEC_GATHER_IDXDI>
- [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VF2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_SAT_CVT_DS_SIGN_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32585,8 +32608,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v")
- (unspec:VI8_AVX10_2
+ [(set (match_operand:VI8 0 "register_operand" "=v")
+ (unspec:VI8
[(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_SAT_CVT_DS_SIGN_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
@@ -32622,10 +32645,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "avx10_2_minmaxbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "bcst_vector_operand" "vmBr")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_MINMAXBF16))]
"TARGET_AVX10_2"
@@ -32634,10 +32657,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v")
- (unspec:VFH_AVX10_2
- [(match_operand:VFH_AVX10_2 1 "register_operand" "v")
- (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
+ (unspec:VFH_AVX512VL
+ [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+ (match_operand:VFH_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_MINMAX))]
"TARGET_AVX10_2"
@@ -32661,9 +32684,9 @@
(set_attr "mode" "<ssescalarmode>")])
(define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>"
- [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v")
- (unspec:VI1248_AVX10_2
- [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")]
+ [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand" "=v")
+ (unspec:VI1248_AVX512VLBW
+ [(match_operand:VI1248_AVX512VLBW 1 "memory_operand" "m")]
UNSPEC_VMOVRS))]
"TARGET_AVX10_2 && TARGET_MOVRS"
"vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index 15d8e96..64f3c20 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B)
#endif /* __DISABLE_VAES__ */
-#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VAES__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("vaes,avx512f,evex512")
+#pragma GCC target("vaes,avx512f")
#define __DISABLE_VAESF__
#endif /* __VAES__ */
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index 2b36c37..a02ab38 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -28,9 +28,9 @@
#ifndef _VPCLMULQDQINTRIN_H_INCLUDED
#define _VPCLMULQDQINTRIN_H_INCLUDED
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx512f,evex512")
+#pragma GCC target("vpclmulqdq,avx512f")
#define __DISABLE_VPCLMULQDQF__
#endif /* __VPCLMULQDQF__ */
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb73..c8603b9 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
in 128bit, 256bit and 512bit */
4, 4, 6, /* cost of moving XMM,YMM,ZMM register */
4, /* cost of moving SSE register to integer. */
+ 4, /* cost of moving integer register to SSE. */
COSTS_N_BYTES (5), 0, /* Gather load static, per_elt. */
COSTS_N_BYTES (5), 0, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -121,16 +122,24 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
- COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
- COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_BYTES (2), /* cost of MULSS instruction. */
- COSTS_N_BYTES (2), /* cost of MULSD instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */
+ COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_BYTES (4), /* cost of MULSS instruction. */
+ COSTS_N_BYTES (4), /* cost of MULSD instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SS instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SD instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSS instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSD instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
+
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -219,6 +228,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -243,6 +253,13 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -330,6 +347,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
4, /* size of l1 cache. 486 has 8kB cache
@@ -356,6 +374,13 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -443,6 +468,7 @@ struct processor_costs pentium_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -467,6 +493,13 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -547,6 +580,7 @@ struct processor_costs lakemont_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -571,6 +605,13 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -666,6 +707,7 @@ struct processor_costs pentiumpro_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -690,6 +732,13 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -776,6 +825,7 @@ struct processor_costs geode_cost = {
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -800,6 +850,13 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -886,6 +943,7 @@ struct processor_costs k6_cost = {
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -913,6 +971,13 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1002,6 +1067,7 @@ struct processor_costs athlon_cost = {
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
5, /* cost of moving SSE register to integer. */
+ 5, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1027,6 +1093,13 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1120,6 +1193,7 @@ struct processor_costs k8_cost = {
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
5, /* cost of moving SSE register to integer. */
+ 5, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1150,6 +1224,13 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1251,6 +1332,7 @@ struct processor_costs amdfam10_cost = {
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1281,6 +1363,13 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1374,6 +1463,7 @@ const struct processor_costs bdver_cost = {
{10, 10, 10, 40, 60}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
16, /* cost of moving SSE register to integer. */
+ 16, /* cost of moving integer register to SSE. */
12, 12, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
16, /* size of l1 cache. */
@@ -1405,6 +1495,13 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1518,6 +1615,7 @@ struct processor_costs znver1_cost = {
{8, 8, 8, 16, 32}, /* cost of unaligned stores. */
2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1553,6 +1651,14 @@ struct processor_costs znver1_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ /* Real latency is 4, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1677,6 +1783,7 @@ struct processor_costs znver2_cost = {
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1712,6 +1819,13 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1812,6 +1926,7 @@ struct processor_costs znver3_cost = {
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
throughput 9. Approx 7 uops do not depend on vector size and every load
is 4 uops. */
@@ -1847,6 +1962,13 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1949,6 +2071,7 @@ struct processor_costs znver4_cost = {
2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
throughput 5. Approx 7 uops do not depend on vector size and every load
is 5 uops. */
@@ -1984,6 +2107,14 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ /* Real latency is 6, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2089,6 +2220,7 @@ struct processor_costs znver5_cost = {
2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* TODO: gather and scatter instructions are currently disabled in
x86-tune.def. In some cases they are however a win, see PR116582
@@ -2135,6 +2267,13 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
/* DIVSD has throughtput 0.13 and latency 20. */
COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2250,6 +2389,7 @@ struct processor_costs skylake_cost = {
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -2274,6 +2414,13 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2379,6 +2526,7 @@ struct processor_costs icelake_cost = {
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -2403,6 +2551,13 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2502,6 +2657,7 @@ struct processor_costs alderlake_cost = {
{8, 8, 8, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2526,6 +2682,13 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2618,6 +2781,7 @@ const struct processor_costs btver1_cost = {
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
14, /* cost of moving SSE register to integer. */
+ 14, /* cost of moving integer register to SSE. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2642,6 +2806,13 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2731,6 +2902,7 @@ const struct processor_costs btver2_cost = {
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
14, /* cost of moving SSE register to integer. */
+ 14, /* cost of moving integer register to SSE. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2755,6 +2927,13 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2843,6 +3022,7 @@ struct processor_costs pentium4_cost = {
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
20, /* cost of moving SSE register to integer. */
+ 20, /* cost of moving integer register to SSE. */
16, 16, /* Gather load static, per_elt. */
16, 16, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -2867,6 +3047,13 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -2958,6 +3145,7 @@ struct processor_costs nocona_cost = {
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
20, /* cost of moving SSE register to integer. */
+ 20, /* cost of moving integer register to SSE. */
12, 12, /* Gather load static, per_elt. */
12, 12, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -2982,6 +3170,13 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3071,6 +3266,7 @@ struct processor_costs atom_cost = {
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3095,6 +3291,13 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3184,6 +3387,7 @@ struct processor_costs slm_cost = {
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3208,6 +3412,13 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3309,6 +3520,7 @@ struct processor_costs tremont_cost = {
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3335,6 +3547,13 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3349,119 +3568,6 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (2), /* Branch mispredict scale. */
};
-static stringop_algs intel_memcpy[2] = {
- {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
- {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
- {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs intel_memset[2] = {
- {libcall, {{8, loop, false}, {15, unrolled_loop, false},
- {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
- {libcall, {{24, loop, false}, {32, unrolled_loop, false},
- {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs intel_cost = {
- {
- /* Start of register allocator costs. integer->integer move cost is 2. */
- 6, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 6, 6}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
- {6, 6, 6, 6, 6}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
- 4, 4, /* SSE->integer and integer->SSE moves */
- 4, 4, /* mask->integer and integer->mask moves */
- {4, 4, 4}, /* cost of loading mask register
- in QImode, HImode, SImode. */
- {6, 6, 6}, /* cost if storing mask register
- in QImode, HImode, SImode. */
- 2, /* cost of moving mask register. */
- /* End of register allocator costs. */
- },
-
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 17, /* MOVE_RATIO */
- 6, /* CLEAR_RATIO */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers */
- {6, 6, 6, 6, 6}, /* cost of loading SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit */
- {6, 6, 6, 6, 6}, /* cost of storing SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit */
- {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
- 4, /* cost of moving SSE register to integer. */
- 6, 6, /* Gather load static, per_elt. */
- 6, 6, /* Gather store static, per_elt. */
- 32, /* size of l1 cache. */
- 256, /* size of l2 cache. */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 3, /* Branch cost */
- COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (8), /* cost of FMUL instruction. */
- COSTS_N_INSNS (20), /* cost of FDIV instruction. */
- COSTS_N_INSNS (8), /* cost of FABS instruction. */
- COSTS_N_INSNS (8), /* cost of FCHS instruction. */
- COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
-
- COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
- COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_INSNS (8), /* cost of MULSS instruction. */
- COSTS_N_INSNS (8), /* cost of MULSD instruction. */
- COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
- COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
- COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
- COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
- COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
- COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
- 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
- intel_memcpy,
- intel_memset,
- COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
- COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
- "16", /* Loop alignment. */
- "16:8:8", /* Jump alignment. */
- "0:0:8", /* Label alignment. */
- "16", /* Func alignment. */
- 4, /* Small unroll limit. */
- 2, /* Small unroll factor. */
- COSTS_N_INSNS (2), /* Branch mispredict scale. */
-};
-
/* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU. */
static stringop_algs lujiazui_memcpy[2] = {
{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
@@ -3532,15 +3638,16 @@ struct processor_costs lujiazui_cost = {
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers. */
+ {6, 6, 6}, /* cost of storing integer registers. */
{6, 6, 6, 10, 15}, /* cost of loading SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
{6, 6, 6, 10, 15}, /* cost of storing SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
{6, 6, 6, 10, 15}, /* cost of unaligned loads. */
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
- 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
- 6, /* cost of moving SSE register to integer. */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
+ 6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3566,6 +3673,13 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3658,6 +3772,7 @@ struct processor_costs yongfeng_cost = {
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3682,6 +3797,13 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3774,6 +3896,7 @@ struct processor_costs shijidadao_cost = {
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3798,6 +3921,13 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -3814,19 +3944,36 @@ struct processor_costs shijidadao_cost = {
-/* Generic should produce code tuned for Core-i7 (and newer chips)
- and btver1 (and newer chips). */
+/* Generic should produce code tuned for Haswell (and newer chips)
+ and znver1 (and newer chips):
+ 1. Don't align memory.
+ 2. For known sizes, prefer vector loop, unroll loop with 4 moves or
+ stores per iteration without aligning the loop, up to 256 bytes.
+ 3. For unknown sizes, use memcpy/memset.
+ 4. Since each loop iteration has 4 stores and 8 stores for zeroing
+ with unroll loop may be needed, change CLEAR_RATIO to 10 so that
+ zeroing up to 72 bytes are fully unrolled with 9 stores without
+ SSE.
+ */
static stringop_algs generic_memcpy[2] = {
- {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
- {-1, libcall, false}}},
- {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
- {-1, libcall, false}}}};
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}},
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}}};
static stringop_algs generic_memset[2] = {
- {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
- {-1, libcall, false}}},
- {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
- {-1, libcall, false}}}};
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}},
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}}};
static const
struct processor_costs generic_cost = {
{
@@ -3883,7 +4030,7 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 6, /* CLEAR_RATIO */
+ 10, /* CLEAR_RATIO */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -3896,6 +4043,7 @@ struct processor_costs generic_cost = {
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3922,6 +4070,13 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4022,6 +4177,7 @@ struct processor_costs core_cost = {
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2, /* cost of moving SSE register to integer. */
+ 2, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
rec. throughput 6.
So 5 uops statically and one uops per load. */
@@ -4051,6 +4207,13 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index 685a83c..ff9c268 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -45,7 +45,6 @@ ix86_issue_rate (void)
case PROCESSOR_LAKEMONT:
case PROCESSOR_BONNELL:
case PROCESSOR_SILVERMONT:
- case PROCESSOR_INTEL:
case PROCESSOR_K6:
case PROCESSOR_BTVER2:
case PROCESSOR_PENTIUM4:
@@ -80,7 +79,17 @@ ix86_issue_rate (void)
case PROCESSOR_ALDERLAKE:
case PROCESSOR_YONGFENG:
case PROCESSOR_SHIJIDADAO:
+ case PROCESSOR_SIERRAFOREST:
+ case PROCESSOR_INTEL:
case PROCESSOR_GENERIC:
+ /* For znver5 decoder can handle 4 or 8 instructions per cycle,
+ op cache 12 instruction/cycle, dispatch 8 instructions
+ integer rename 8 instructions and Fp 6 instructions.
+
+ The scheduler, without understanding out of order nature of the CPU
+ is not going to be able to use more than 4 instructions since that
+ is limits of the decoders. */
+ case PROCESSOR_ZNVER5:
return 4;
case PROCESSOR_ICELAKE_CLIENT:
@@ -91,13 +100,14 @@ ix86_issue_rate (void)
return 5;
case PROCESSOR_SAPPHIRERAPIDS:
- /* For znver5 decoder can handle 4 or 8 instructions per cycle,
- op cache 12 instruction/cycle, dispatch 8 instructions
- integer rename 8 instructions and Fp 6 instructions.
-
- The scheduler, without understanding out of order nature of the CPU
- is unlikely going to be able to fill all of these. */
- case PROCESSOR_ZNVER5:
+ case PROCESSOR_GRANITERAPIDS:
+ case PROCESSOR_GRANITERAPIDS_D:
+ case PROCESSOR_DIAMONDRAPIDS:
+ case PROCESSOR_GRANDRIDGE:
+ case PROCESSOR_CLEARWATERFOREST:
+ case PROCESSOR_ARROWLAKE:
+ case PROCESSOR_ARROWLAKE_S:
+ case PROCESSOR_PANTHERLAKE:
return 6;
default:
@@ -487,6 +497,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case PROCESSOR_HASWELL:
case PROCESSOR_TREMONT:
case PROCESSOR_ALDERLAKE:
+ case PROCESSOR_INTEL:
case PROCESSOR_GENERIC:
/* Stack engine allows to execute push&pop instructions in parall. */
if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
@@ -509,7 +520,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
break;
case PROCESSOR_SILVERMONT:
- case PROCESSOR_INTEL:
if (!reload_completed)
return cost;
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c857e76..91cdca7 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -87,9 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
several insns to break false dependency on the dest register for GLC
micro-architecture. */
DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC,
- "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS
- | m_GRANITERAPIDS_D | m_DIAMONDRAPIDS | m_CORE_HYBRID
- | m_CORE_ATOM)
+ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE)
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
@@ -574,6 +572,11 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
"sse_movcc_use_blendv", ~m_CORE_ATOM)
+/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI,
+ V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */
+DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF,
+ "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5)
+
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
/*****************************************************************************/