aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-c.cc1
-rw-r--r--gcc/config/aarch64/aarch64-cores.def4
-rw-r--r--gcc/config/aarch64/aarch64-elf-metadata.cc145
-rw-r--r--gcc/config/aarch64/aarch64-elf-metadata.h253
-rw-r--r--gcc/config/aarch64/aarch64-protos.h15
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def5
-rw-r--r--gcc/config/aarch64/aarch64-simd.md58
-rw-r--r--gcc/config/aarch64/aarch64-sme.md2
-rw-r--r--gcc/config/aarch64/aarch64-speculation.cc11
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc7
-rw-r--r--gcc/config/aarch64/aarch64-sve.md726
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md74
-rw-r--r--gcc/config/aarch64/aarch64-vxworks.h7
-rw-r--r--gcc/config/aarch64/aarch64.cc614
-rw-r--r--gcc/config/aarch64/aarch64.md214
-rw-r--r--gcc/config/aarch64/arm_acle.h14
-rw-r--r--gcc/config/aarch64/constraints.md7
-rw-r--r--gcc/config/aarch64/driver-aarch64.cc2
-rwxr-xr-xgcc/config/aarch64/gcc-auto-profile53
-rw-r--r--gcc/config/aarch64/iterators.md132
-rw-r--r--gcc/config/aarch64/predicates.md8
-rw-r--r--gcc/config/aarch64/t-aarch6411
-rw-r--r--gcc/config/alpha/alpha.cc35
-rw-r--r--gcc/config/arc/arc.cc5
-rw-r--r--gcc/config/arm/aarch-common.cc3
-rw-r--r--gcc/config/arm/aout.h5
-rw-r--r--gcc/config/arm/arm-builtins.cc1279
-rw-r--r--gcc/config/arm/arm-c.cc7
-rw-r--r--gcc/config/arm/arm-cpus.in28
-rw-r--r--gcc/config/arm/arm-generic.md4
-rw-r--r--gcc/config/arm/arm-opts.h1
-rw-r--r--gcc/config/arm/arm-protos.h8
-rw-r--r--gcc/config/arm/arm-tables.opt6
-rw-r--r--gcc/config/arm/arm-tune.md53
-rw-r--r--gcc/config/arm/arm.cc441
-rw-r--r--gcc/config/arm/arm.h175
-rw-r--r--gcc/config/arm/arm.md43
-rw-r--r--gcc/config/arm/arm.opt3
-rw-r--r--gcc/config/arm/constraints.md18
-rw-r--r--gcc/config/arm/iterators.md20
-rw-r--r--gcc/config/arm/iwmmxt.md1766
-rw-r--r--gcc/config/arm/iwmmxt2.md903
-rw-r--r--gcc/config/arm/marvell-f-iwmmxt.md189
-rw-r--r--gcc/config/arm/predicates.md11
-rw-r--r--gcc/config/arm/t-arm3
-rw-r--r--gcc/config/arm/thumb2.md2
-rw-r--r--gcc/config/arm/types.md123
-rw-r--r--gcc/config/arm/unspecs.md29
-rw-r--r--gcc/config/arm/vec-common.md31
-rw-r--r--gcc/config/avr/avr-passes.cc8
-rw-r--r--gcc/config/avr/avr.cc12
-rw-r--r--gcc/config/avr/avr.md39
-rw-r--r--gcc/config/bfin/bfin.cc3
-rw-r--r--gcc/config/c6x/c6x.cc3
-rw-r--r--gcc/config/c6x/c6x.h6
-rw-r--r--gcc/config/cris/cris.cc3
-rw-r--r--gcc/config/cris/cris.md3
-rw-r--r--gcc/config/csky/csky.cc3
-rw-r--r--gcc/config/darwin.h1
-rw-r--r--gcc/config/epiphany/resolve-sw-modes.cc3
-rw-r--r--gcc/config/fr30/fr30.cc3
-rw-r--r--gcc/config/frv/frv.cc12
-rw-r--r--gcc/config/frv/frv.md15
-rw-r--r--gcc/config/gcn/gcn-devices.def33
-rw-r--r--gcc/config/gcn/gcn-opts.h13
-rw-r--r--gcc/config/gcn/gcn-tables.opt9
-rw-r--r--gcc/config/gcn/gcn-valu.md8
-rw-r--r--gcc/config/gcn/gcn.cc37
-rw-r--r--gcc/config/gcn/gcn.h2
-rw-r--r--gcc/config/gcn/gcn.md172
-rw-r--r--gcc/config/gcn/gcn.opt8
-rw-r--r--gcc/config/gcn/mkoffload.cc3
-rw-r--r--gcc/config/i386/avx10_2-512bf16intrin.h681
-rw-r--r--gcc/config/i386/avx10_2-512convertintrin.h572
-rw-r--r--gcc/config/i386/avx10_2-512mediaintrin.h514
-rw-r--r--gcc/config/i386/avx10_2-512minmaxintrin.h489
-rw-r--r--gcc/config/i386/avx10_2-512satcvtintrin.h1575
-rw-r--r--gcc/config/i386/avx10_2bf16intrin.h614
-rw-r--r--gcc/config/i386/avx10_2convertintrin.h530
-rw-r--r--gcc/config/i386/avx10_2mediaintrin.h469
-rw-r--r--gcc/config/i386/avx10_2minmaxintrin.h448
-rw-r--r--gcc/config/i386/avx10_2satcvtintrin.h1779
-rw-r--r--gcc/config/i386/avx512bf16intrin.h19
-rw-r--r--gcc/config/i386/avx512bf16vlintrin.h4
-rw-r--r--gcc/config/i386/avx512bitalgintrin.h4
-rw-r--r--gcc/config/i386/avx512bitalgvlintrin.h4
-rw-r--r--gcc/config/i386/avx512bwintrin.h19
-rw-r--r--gcc/config/i386/avx512cdintrin.h2
-rw-r--r--gcc/config/i386/avx512dqintrin.h21
-rw-r--r--gcc/config/i386/avx512fintrin.h32
-rw-r--r--gcc/config/i386/avx512fp16intrin.h21
-rw-r--r--gcc/config/i386/avx512fp16vlintrin.h4
-rw-r--r--gcc/config/i386/avx512ifmaintrin.h4
-rw-r--r--gcc/config/i386/avx512ifmavlintrin.h4
-rw-r--r--gcc/config/i386/avx512vbmi2intrin.h4
-rw-r--r--gcc/config/i386/avx512vbmi2vlintrin.h4
-rw-r--r--gcc/config/i386/avx512vbmiintrin.h4
-rw-r--r--gcc/config/i386/avx512vbmivlintrin.h4
-rw-r--r--gcc/config/i386/avx512vlbwintrin.h4
-rw-r--r--gcc/config/i386/avx512vldqintrin.h4
-rw-r--r--gcc/config/i386/avx512vlintrin.h6
-rw-r--r--gcc/config/i386/avx512vnniintrin.h4
-rw-r--r--gcc/config/i386/avx512vnnivlintrin.h4
-rw-r--r--gcc/config/i386/avx512vp2intersectintrin.h4
-rw-r--r--gcc/config/i386/avx512vp2intersectvlintrin.h5
-rw-r--r--gcc/config/i386/avx512vpopcntdqintrin.h4
-rw-r--r--gcc/config/i386/avx512vpopcntdqvlintrin.h5
-rw-r--r--gcc/config/i386/cygming.h14
-rw-r--r--gcc/config/i386/driver-i386.cc34
-rwxr-xr-xgcc/config/i386/gcc-auto-profile29
-rw-r--r--gcc/config/i386/gfniintrin.h8
-rw-r--r--gcc/config/i386/host-mingw32.cc32
-rw-r--r--gcc/config/i386/i386-builtin.def1290
-rw-r--r--gcc/config/i386/i386-builtins.cc50
-rw-r--r--gcc/config/i386/i386-c.cc4
-rw-r--r--gcc/config/i386/i386-expand.cc343
-rw-r--r--gcc/config/i386/i386-features.cc628
-rw-r--r--gcc/config/i386/i386-features.h13
-rw-r--r--gcc/config/i386/i386-isa.def2
-rw-r--r--gcc/config/i386/i386-options.cc163
-rw-r--r--gcc/config/i386/i386-passes.def1
-rw-r--r--gcc/config/i386/i386-protos.h11
-rw-r--r--gcc/config/i386/i386.cc1262
-rw-r--r--gcc/config/i386/i386.h56
-rw-r--r--gcc/config/i386/i386.md537
-rw-r--r--gcc/config/i386/i386.opt45
-rw-r--r--gcc/config/i386/i386.opt.urls9
-rw-r--r--gcc/config/i386/immintrin.h10
-rw-r--r--gcc/config/i386/predicates.md35
-rw-r--r--gcc/config/i386/sse.md1383
-rw-r--r--gcc/config/i386/vaesintrin.h4
-rw-r--r--gcc/config/i386/vpclmulqdqintrin.h4
-rw-r--r--gcc/config/i386/x86-tune-costs.h297
-rw-r--r--gcc/config/i386/x86-tune-sched.cc24
-rw-r--r--gcc/config/i386/x86-tune.def9
-rw-r--r--gcc/config/ia64/ia64.cc15
-rw-r--r--gcc/config/loongarch/loongarch.cc4
-rw-r--r--gcc/config/m32r/m32r.cc3
-rw-r--r--gcc/config/m32r/m32r.md6
-rw-r--r--gcc/config/m68k/m68k.cc9
-rw-r--r--gcc/config/m68k/m68k.md12
-rw-r--r--gcc/config/mcore/mcore.cc4
-rw-r--r--gcc/config/microblaze/microblaze.cc7
-rw-r--r--gcc/config/mingw/mingw32.h9
-rw-r--r--gcc/config/mingw/winnt.cc14
-rw-r--r--gcc/config/mingw/winnt.h1
-rw-r--r--gcc/config/mips/mips.cc19
-rw-r--r--gcc/config/nds32/nds32-intrinsic.md11
-rw-r--r--gcc/config/nvptx/gen-multilib-matches-tests67
-rw-r--r--gcc/config/nvptx/mkoffload.cc15
-rw-r--r--gcc/config/nvptx/nvptx-gen.h1
-rw-r--r--gcc/config/nvptx/nvptx-gen.opt3
-rw-r--r--gcc/config/nvptx/nvptx-opts.h1
-rw-r--r--gcc/config/nvptx/nvptx-sm.def1
-rw-r--r--gcc/config/nvptx/nvptx.cc79
-rw-r--r--gcc/config/nvptx/nvptx.h1
-rw-r--r--gcc/config/nvptx/nvptx.md4
-rw-r--r--gcc/config/nvptx/nvptx.opt15
-rw-r--r--gcc/config/or1k/or1k.cc63
-rw-r--r--gcc/config/or1k/or1k.md57
-rw-r--r--gcc/config/or1k/or1k.opt4
-rw-r--r--gcc/config/or1k/predicates.md7
-rw-r--r--gcc/config/pa/pa-hpux.h14
-rw-r--r--gcc/config/pa/pa.cc3
-rw-r--r--gcc/config/pru/pru.cc6
-rw-r--r--gcc/config/riscv/autovec-opt.md119
-rw-r--r--gcc/config/riscv/autovec.md99
-rw-r--r--gcc/config/riscv/bitmanip.md290
-rw-r--r--gcc/config/riscv/constraints.md18
-rw-r--r--gcc/config/riscv/gen-riscv-ext-opt.cc105
-rw-r--r--gcc/config/riscv/gen-riscv-ext-texi.cc88
-rw-r--r--gcc/config/riscv/generic-vector-ooo.md4
-rw-r--r--gcc/config/riscv/genrvv-type-indexer.cc26
-rw-r--r--gcc/config/riscv/gnu.h59
-rw-r--r--gcc/config/riscv/iterators.md3
-rw-r--r--gcc/config/riscv/mips-p8700.md167
-rwxr-xr-xgcc/config/riscv/multilib-generator4
-rw-r--r--gcc/config/riscv/predicates.md28
-rw-r--r--gcc/config/riscv/riscv-c.cc37
-rw-r--r--gcc/config/riscv/riscv-cores.def68
-rw-r--r--gcc/config/riscv/riscv-ext-corev.def87
-rw-r--r--gcc/config/riscv/riscv-ext-sifive.def87
-rw-r--r--gcc/config/riscv/riscv-ext-thead.def191
-rw-r--r--gcc/config/riscv/riscv-ext-ventana.def35
-rw-r--r--gcc/config/riscv/riscv-ext.def2084
-rw-r--r--gcc/config/riscv/riscv-ext.opt447
-rw-r--r--gcc/config/riscv/riscv-ext.opt.urls2
-rw-r--r--gcc/config/riscv/riscv-opts.h26
-rw-r--r--gcc/config/riscv/riscv-passes.def2
-rw-r--r--gcc/config/riscv/riscv-protos.h8
-rw-r--r--gcc/config/riscv/riscv-shorten-memrefs.cc3
-rw-r--r--gcc/config/riscv/riscv-subset.h65
-rw-r--r--gcc/config/riscv/riscv-target-attr.cc6
-rw-r--r--gcc/config/riscv/riscv-v.cc941
-rw-r--r--gcc/config/riscv/riscv-vect-permconst.cc318
-rw-r--r--gcc/config/riscv/riscv-vector-builtins-shapes.cc52
-rw-r--r--gcc/config/riscv/riscv-vector-builtins-shapes.h2
-rw-r--r--gcc/config/riscv/riscv-vector-builtins-types.def40
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.cc382
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.def30
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.h44
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc29
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc25
-rw-r--r--gcc/config/riscv/riscv.cc1234
-rw-r--r--gcc/config/riscv/riscv.h4
-rw-r--r--gcc/config/riscv/riscv.md191
-rw-r--r--gcc/config/riscv/riscv.opt319
-rw-r--r--gcc/config/riscv/sifive-vector-builtins-bases.cc78
-rw-r--r--gcc/config/riscv/sifive-vector-builtins-bases.h3
-rw-r--r--gcc/config/riscv/sifive-vector-builtins-functions.def45
-rw-r--r--gcc/config/riscv/sifive-vector.md871
-rw-r--r--gcc/config/riscv/sync.md11
-rw-r--r--gcc/config/riscv/t-riscv53
-rw-r--r--gcc/config/riscv/vector-iterators.md58
-rw-r--r--gcc/config/riscv/vector.md38
-rw-r--r--gcc/config/riscv/zicond.md36
-rw-r--r--gcc/config/rl78/rl78.cc3
-rw-r--r--gcc/config/rs6000/rs6000-builtin.cc24
-rw-r--r--gcc/config/rs6000/rs6000.cc50
-rw-r--r--gcc/config/rx/rx.md20
-rw-r--r--gcc/config/s390/9175.md316
-rw-r--r--gcc/config/s390/driver-native.cc4
-rw-r--r--gcc/config/s390/s390-builtins.def8
-rw-r--r--gcc/config/s390/s390-c.cc4
-rw-r--r--gcc/config/s390/s390-opts.h2
-rw-r--r--gcc/config/s390/s390-protos.h1
-rw-r--r--gcc/config/s390/s390.cc173
-rw-r--r--gcc/config/s390/s390.h18
-rw-r--r--gcc/config/s390/s390.md65
-rw-r--r--gcc/config/s390/s390.opt5
-rw-r--r--gcc/config/s390/vector.md169
-rw-r--r--gcc/config/sh/sh-modes.def6
-rw-r--r--gcc/config/sh/sh_treg_combine.cc5
-rw-r--r--gcc/config/sparc/sparc.cc15
-rw-r--r--gcc/config/sparc/sparc.md87
-rw-r--r--gcc/config/stormy16/stormy16.cc3
-rw-r--r--gcc/config/stormy16/stormy16.md3
-rw-r--r--gcc/config/xtensa/predicates.md13
-rw-r--r--gcc/config/xtensa/xtensa.cc133
-rw-r--r--gcc/config/xtensa/xtensa.h1
-rw-r--r--gcc/config/xtensa/xtensa.md149
241 files changed, 19033 insertions, 13776 deletions
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index d1e2ab9..98337b7 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -293,6 +293,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
"__ARM_FEATURE_SME2p1", pfile);
+ aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 7f204fd..24b7cd3 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -132,7 +132,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, (CRYPTO, PROFI
/* Fujitsu ('F') cores. */
AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1)
-AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FP8, LS64, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1)
+AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LS64, LUT, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1)
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1)
@@ -224,7 +224,7 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG
AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
/* NVIDIA ('N') cores. */
-AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
+AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
/* Generic Architecture Processors. */
AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1)
diff --git a/gcc/config/aarch64/aarch64-elf-metadata.cc b/gcc/config/aarch64/aarch64-elf-metadata.cc
new file mode 100644
index 0000000..88fbb93
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-elf-metadata.cc
@@ -0,0 +1,145 @@
+/* ELF metadata for AArch64 architecture.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define INCLUDE_STRING
+#define INCLUDE_ALGORITHM
+#define INCLUDE_MEMORY
+#define INCLUDE_VECTOR
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "output.h"
+
+#include "aarch64-elf-metadata.h"
+
+/* Defined for convenience. */
+#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+
+namespace aarch64 {
+
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000;
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_BTI = (1U << 0);
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_PAC = (1U << 1);
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_GCS = (1U << 2);
+
+namespace {
+
+std::string
+gnu_property_features_to_string (unsigned feature_1_and)
+{
+ struct flag_name
+ {
+ unsigned int mask;
+ const char *name;
+ };
+
+ static const flag_name flags[] = {
+ {GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI"},
+ {GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC"},
+ {GNU_PROPERTY_AARCH64_FEATURE_1_GCS, "GCS"},
+ };
+
+ const char *separator = "";
+ std::string s_features;
+ for (auto &flag : flags)
+ if (feature_1_and & flag.mask)
+ {
+ s_features.append (separator).append (flag.name);
+ separator = ", ";
+ }
+ return s_features;
+};
+
+} // namespace anonymous
+
+section_note_gnu_property::section_note_gnu_property ()
+ : m_feature_1_and (0) {}
+
+void
+section_note_gnu_property::bti_enabled ()
+{
+ m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+}
+
+void
+section_note_gnu_property::pac_enabled ()
+{
+ m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+}
+
+void
+section_note_gnu_property::gcs_enabled ()
+{
+ m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+}
+
+void
+section_note_gnu_property::write () const
+{
+ if (m_feature_1_and)
+ {
+ /* Generate .note.gnu.property section. */
+ switch_to_section (
+ get_section (".note.gnu.property", SECTION_NOTYPE, NULL));
+
+ /* PT_NOTE header: namesz, descsz, type.
+ namesz = 4 ("GNU\0")
+ descsz = 16 (Size of the program property array)
+ [(12 + padding) * Number of array elements]
+ type = 5 (NT_GNU_PROPERTY_TYPE_0). */
+ assemble_align (POINTER_SIZE);
+ assemble_integer (GEN_INT (4), 4, 32, 1);
+ assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
+ assemble_integer (GEN_INT (5), 4, 32, 1);
+
+ /* PT_NOTE name. */
+ assemble_string ("GNU", 4);
+
+ /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
+ type = GNU_PROPERTY_AARCH64_FEATURE_1_AND
+ datasz = 4
+ data = feature_1_and. */
+ fputs (integer_asm_op (4, true), asm_out_file);
+ fprint_whex (asm_out_file, GNU_PROPERTY_AARCH64_FEATURE_1_AND);
+ putc ('\n', asm_out_file);
+ assemble_integer (GEN_INT (4), 4, 32, 1);
+
+ fputs (integer_asm_op (4, true), asm_out_file);
+ fprint_whex (asm_out_file, m_feature_1_and);
+ if (flag_debug_asm)
+ {
+ auto const &s_features
+ = gnu_property_features_to_string (m_feature_1_and);
+ asm_fprintf (asm_out_file,
+ "\t%s GNU_PROPERTY_AARCH64_FEATURE_1_AND (%s)\n",
+ ASM_COMMENT_START, s_features.c_str ());
+ }
+ else
+ putc ('\n', asm_out_file);
+
+ /* Pad the size of the note to the required alignment. */
+ assemble_align (POINTER_SIZE);
+ }
+}
+
+} // namespace aarch64
diff --git a/gcc/config/aarch64/aarch64-elf-metadata.h b/gcc/config/aarch64/aarch64-elf-metadata.h
new file mode 100644
index 0000000..e99f6df
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-elf-metadata.h
@@ -0,0 +1,253 @@
+/* ELF metadata for AArch64 architecture.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_AARCH64_ELF_METADATA_H
+#define GCC_AARCH64_ELF_METADATA_H
+
+#include "vec.h"
+
+namespace aarch64 {
+
+class section_note_gnu_property
+{
+ public:
+ section_note_gnu_property ();
+
+ /* Add BTI flag to GNU properties. */
+ void bti_enabled ();
+ /* Add GCS flag to GNU properties. */
+ void gcs_enabled ();
+ /* Add PAC flag to GNU properties. */
+ void pac_enabled ();
+
+ /* Write the data to the assembly file. */
+ void write () const;
+
+ private:
+ unsigned m_feature_1_and;
+};
+
+enum subsection_optionality : uint8_t
+{
+ required = 0x0,
+ optional = 0x1,
+};
+
+enum subsection_val_type : uint8_t
+{
+ uleb128 = 0x0,
+ ntbs = 0x1,
+};
+
+enum BA_TagFeature_t : uint8_t
+{
+ Tag_Feature_BTI = 0,
+ Tag_Feature_PAC = 1,
+ Tag_Feature_GCS = 2,
+};
+
+template <typename T_tag, typename T_val>
+struct aeabi_attribute
+{
+ T_tag tag;
+ T_val value;
+};
+
+template <typename T_tag, typename T_val>
+aeabi_attribute<T_tag, T_val>
+make_aeabi_attribute (T_tag tag, T_val val)
+{
+ return aeabi_attribute<T_tag, T_val>{tag, val};
+}
+
+namespace details {
+
+constexpr const char *
+to_c_str (bool b)
+{
+ return b ? "true" : "false";
+}
+
+constexpr const char *
+to_c_str (const char *s)
+{
+ return s;
+}
+
+constexpr const char *
+to_c_str (subsection_optionality v)
+{
+ return (v == optional ? "optional"
+ : v == required ? "required"
+ : nullptr);
+}
+
+constexpr const char *
+to_c_str (subsection_val_type v)
+{
+ return (v == uleb128 ? "ULEB128"
+ : v == ntbs ? "NTBS"
+ : nullptr);
+}
+
+constexpr const char *
+to_c_str (BA_TagFeature_t feature)
+{
+ return (feature == Tag_Feature_BTI ? "Tag_Feature_BTI"
+ : feature == Tag_Feature_PAC ? "Tag_Feature_PAC"
+ : feature == Tag_Feature_GCS ? "Tag_Feature_GCS"
+ : nullptr);
+}
+
+template <
+ typename T,
+ typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr const char *
+aeabi_attr_str_fmt (T)
+{
+ return "\t.aeabi_attribute %s, %u";
+}
+
+constexpr const char *
+aeabi_attr_str_fmt (const char *)
+{
+ return "\t.aeabi_attribute %s, \"%s\"";
+}
+
+template <
+ typename T,
+ typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr uint8_t
+aeabi_attr_val_for_fmt (T value)
+{
+ return static_cast<uint8_t>(value);
+}
+
+constexpr const char *
+aeabi_attr_val_for_fmt (const char *s)
+{
+ return s;
+}
+
+template <typename T_tag, typename T_val>
+void
+write (FILE *out_file, aeabi_attribute<T_tag, T_val> const &attr)
+{
+ asm_fprintf (out_file, aeabi_attr_str_fmt (T_val{}),
+ to_c_str (attr.tag), aeabi_attr_val_for_fmt (attr.value));
+ if (flag_debug_asm)
+ asm_fprintf (out_file, "\t%s %s: %s", ASM_COMMENT_START,
+ to_c_str (attr.tag), to_c_str (attr.value));
+ asm_fprintf (out_file, "\n");
+}
+
+template <
+ typename T,
+ typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr subsection_val_type
+deduce_attr_av_type (T)
+{
+ return subsection_val_type::uleb128;
+}
+
+constexpr subsection_val_type
+deduce_attr_av_type (const char *)
+{
+ return subsection_val_type::ntbs;
+}
+
+} // namespace details
+
+/* AEABI subsections can be public or private. A subsection is public if it is
+ prefixed with "aeabi", private otherwise. The header of an AEABI subsection
+ is composed of a name (usually a vendor name), an optionality status (optional
+ or required), and the expected type of its associated attributes (ULEB128 or
+ NTBS). Note: The attributes in the same subsection have all the same type.
+ An attribute is composed of a tag identifier (ULEB128), and its value (ULEB128
+ or NTBS).
+
+ Syntax:
+ .aeabi_subsection NameOfTheSubsection: string (=NTBS),
+ Optional: boolean (=ULEB128),
+ AttributeValueType: enum{ULEB128, NTBS} (=ULEB128)
+ [
+ .aeabi_attribute TagIdentifier: ULEB128,
+ TagValue: Variant[ULEB128|NTBS]
+ ]*
+
+ Example:
+ .aeabi_subsection .aeabi-feature-and-bits, optional, ULEB128
+ .aeabi_attribute Tag_Feature_GCS, 1 // Tag_Feature_GCS: true
+
+ Note: The textual representations of the tag and its value are emitted as a
+ comment along their numerical representations to annotate the assembler
+ output when the developer flag '-dA' is provided. */
+template <
+ typename T_tag, /* The type of a tag. */
+ typename T_val, /* The type of a value. */
+ size_t N = 0 /* The number of expected attributes if we know it. */
+>
+class aeabi_subsection
+{
+ public:
+ aeabi_subsection (const char *name, bool optional)
+ : m_name (name),
+ m_optionality (optional
+ ? subsection_optionality::optional
+ : subsection_optionality::required),
+ m_avtype (details::deduce_attr_av_type (T_val{}))
+ {}
+
+ /* Append an attribute to the subsection. */
+ void append (aeabi_attribute<T_tag, T_val> &&attr)
+ {
+ m_attributes.quick_push (std::move (attr));
+ }
+
+ /* Write the data to the assembly file. */
+ void write (FILE *out_file) const
+ {
+ asm_fprintf (out_file, "\n\t.aeabi_subsection %s, %s, %s\n",
+ m_name, details::to_c_str (m_optionality),
+ details::to_c_str (m_avtype));
+
+ for (auto const &attr : m_attributes)
+ details::write (out_file, attr);
+ }
+
+ /* Indicate if the subsection is empty. */
+ bool empty () const
+ {
+ return m_attributes.is_empty ();
+ }
+
+ private:
+ const char *m_name;
+ subsection_optionality m_optionality;
+ subsection_val_type m_avtype;
+ auto_vec<aeabi_attribute<T_tag, T_val>, N> m_attributes;
+};
+
+} // namespace aarch64
+
+#endif /* GCC_AARCH64_ELF_METADATA_H */
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8f44aea..31f2f5b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -290,8 +290,8 @@ struct sve_vec_cost : simd_vec_cost
/* The cost of a gather load instruction. The x32 value is for loads
of 32-bit elements and the x64 value is for loads of 64-bit elements. */
- const int gather_load_x32_cost;
- const int gather_load_x64_cost;
+ const unsigned int gather_load_x32_cost;
+ const unsigned int gather_load_x64_cost;
/* Additional loop initialization cost of using a gather load instruction. The x32
value is for loads of 32-bit elements and the x64 value is for loads of
@@ -933,6 +933,7 @@ char *aarch64_output_simd_mov_imm (rtx, unsigned);
char *aarch64_output_simd_orr_imm (rtx, unsigned);
char *aarch64_output_simd_and_imm (rtx, unsigned);
char *aarch64_output_simd_xor_imm (rtx, unsigned);
+char *aarch64_output_fmov (rtx);
char *aarch64_output_sve_mov_immediate (rtx);
char *aarch64_output_sve_ptrues (rtx);
@@ -946,8 +947,10 @@ bool aarch64_parallel_select_half_p (machine_mode, rtx);
bool aarch64_pars_overlap_p (rtx, rtx);
bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
+bool aarch64_sve_valid_pred_p (rtx, machine_mode);
bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
bool aarch64_simd_valid_and_imm (rtx);
+bool aarch64_simd_valid_and_imm_fmov (rtx, unsigned int * = NULL);
bool aarch64_simd_valid_mov_imm (rtx);
bool aarch64_simd_valid_orr_imm (rtx);
bool aarch64_simd_valid_xor_imm (rtx);
@@ -1026,12 +1029,17 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int);
rtx aarch64_ptrue_reg (machine_mode, machine_mode);
rtx aarch64_pfalse_reg (machine_mode);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
+rtx aarch64_sve_packed_pred (machine_mode);
+rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
+bool aarch64_expand_maskloadstore (rtx *, machine_mode);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
rtx aarch64_replace_reg_mode (rtx, machine_mode);
void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
void aarch64_expand_prologue (void);
+void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
void aarch64_expand_vector_init (rtx, rtx);
void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
void aarch64_sve_expand_vector_init (rtx, rtx);
@@ -1053,6 +1061,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
rtx *, rtx *, rtx *);
void aarch64_expand_subvti (rtx, rtx, rtx,
rtx, rtx, rtx, rtx, bool);
+int aarch64_exact_log2_inverse (unsigned int, rtx);
/* Initialize builtins for SIMD intrinsics. */
@@ -1260,7 +1269,9 @@ void aarch64_restore_za (rtx);
void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
+void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
+extern bool aarch64_pacret_enabled ();
extern bool aarch64_gcs_enabled ();
extern unsigned aarch64_data_alignment (const_tree exp, unsigned align);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6cc45b1..685bf0d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -57,7 +57,7 @@
VAR1 (BINOPP, pmull, 0, DEFAULT, v8qi)
VAR1 (BINOPP, pmull_hi, 0, DEFAULT, v16qi)
BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, FP)
- BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
+ BUILTIN_VHSDF_HSDF (UNOP, sqrt, 2, FP)
BUILTIN_VDQ_I (BINOP, addp, 0, DEFAULT)
BUILTIN_VDQ_I (BINOPU, addp, 0, DEFAULT)
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, DEFAULT)
@@ -848,9 +848,6 @@
BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0, FP)
BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0, FP)
- /* Implemented by sqrt<mode>2. */
- VAR1 (UNOP, sqrt, 2, FP, hf)
-
/* Implemented by <optab><mode>hf2. */
VAR1 (UNOP, floatdi, 2, FP, hf)
VAR1 (UNOP, floatsi, 2, FP, hf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e2afe87..e771def 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1117,17 +1117,17 @@
[(set_attr "type" "neon_fp_abd_<stype><q>")]
)
-;; For AND (vector, register) and BIC (vector, immediate)
+;; For AND (vector, register), BIC (vector, immediate) and FMOV (register)
(define_insn "and<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand")
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
"TARGET_SIMD"
- {@ [ cons: =0 , 1 , 2 ]
- [ w , w , w ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
- [ w , 0 , Db ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
+ {@ [ cons: =0 , 1 , 2 ; attrs: type ]
+ [ w , w , w ; neon_logic<q> ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+ [ w , w , Df ; fmov ] << aarch64_output_fmov (operands[2]);
+ [ w , 0 , Db ; neon_logic<q> ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
}
- [(set_attr "type" "neon_logic<q>")]
)
;; For ORR (vector, register) and ORR (vector, immediate)
@@ -1193,12 +1193,14 @@
(define_insn "aarch64_simd_vec_set_zero<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
- (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
- (match_operand:VALL_F16 3 "register_operand" "0")
+ (match_operand:VALL_F16 1 "register_operand" "0")
+ (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
(match_operand:SI 2 "immediate_operand" "i")))]
- "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
+ "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0"
{
- int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
+ int elt = ENDIAN_LANE_N (<nunits>,
+ aarch64_exact_log2_inverse (<nunits>,
+ operands[2]));
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
}
@@ -1626,6 +1628,24 @@
}
)
+(define_expand "vec_set<mode>"
+ [(match_operand:VSTRUCT_QD 0 "register_operand")
+ (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
+ &operands[2], true);
+ /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+ Allow gen_vec_set<vstruct_elt> to cope with those cases too. */
+ auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+ {
+ return gen_move_insn (x0, x1);
+ };
+ auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
+ emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], operands[2]));
+ DONE;
+})
(define_insn "aarch64_mla<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -8881,6 +8901,26 @@
DONE;
})
+(define_expand "vec_extract<mode><Vel>"
+ [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+ (match_operand:VSTRUCT_QD 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
+ &operands[2], false);
+ /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+ Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too. */
+ auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+ {
+ return gen_move_insn (x0, x1);
+ };
+ auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
+ emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
;; Extract a 64-bit vector from one half of a 128-bit vector.
(define_expand "vec_extract<mode><Vhalf>"
[(match_operand:<VHALF> 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index c49affd..f7958c9 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -373,6 +373,8 @@
(reg:DI SME_STATE_REGNUM)
(reg:DI TPIDR2_SETUP_REGNUM)
(reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
+ (set (reg:DI SME_STATE_REGNUM)
+ (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
(clobber (reg:DI R0_REGNUM))
(clobber (reg:DI R14_REGNUM))
(clobber (reg:DI R15_REGNUM))
diff --git a/gcc/config/aarch64/aarch64-speculation.cc b/gcc/config/aarch64/aarch64-speculation.cc
index 1c7be1a..618045a 100644
--- a/gcc/config/aarch64/aarch64-speculation.cc
+++ b/gcc/config/aarch64/aarch64-speculation.cc
@@ -160,9 +160,7 @@ aarch64_speculation_clobber_sp ()
emit_insn (gen_rtx_SET (scratch, sp));
emit_insn (gen_anddi3 (scratch, scratch, tracker));
emit_insn (gen_rtx_SET (sp, scratch));
- rtx_insn *seq = get_insns ();
- end_sequence ();
- return seq;
+ return end_sequence ();
}
/* Generate a code sequence to establish the tracker variable from the
@@ -176,9 +174,7 @@ aarch64_speculation_establish_tracker ()
rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
emit_insn (gen_cstoredi_neg (tracker,
gen_rtx_NE (CCmode, cc, const0_rtx), cc));
- rtx_insn *seq = get_insns ();
- end_sequence ();
- return seq;
+ return end_sequence ();
}
/* Main speculation tracking pass. */
@@ -405,8 +401,7 @@ aarch64_do_track_speculation ()
{
start_sequence ();
emit_insn (seq);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
}
for (rtx_insn *list = seq; list; list = NEXT_INSN (list))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 3651926..2b627a9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -47,6 +47,8 @@
#include "langhooks.h"
#include "stringpool.h"
#include "attribs.h"
+#include "value-range.h"
+#include "tree-ssanames.h"
#include "aarch64-sve-builtins.h"
#include "aarch64-sve-builtins-base.h"
#include "aarch64-sve-builtins-sve2.h"
@@ -3664,7 +3666,8 @@ gimple_folder::fold_pfalse ()
/* Convert the lhs and all non-boolean vector-type operands to TYPE.
Pass the converted variables to the callback FP, and finally convert the
result back to the original type. Add the necessary conversion statements.
- Return the new call. */
+ Return the new call. Note the tree argument to the callback FP, can only
+ be set once; it will always be a SSA_NAME. */
gimple *
gimple_folder::convert_and_fold (tree type,
gimple *(*fp) (gimple_folder &,
@@ -3675,7 +3678,7 @@ gimple_folder::convert_and_fold (tree type,
tree old_ty = TREE_TYPE (lhs);
gimple_seq stmts = NULL;
bool convert_lhs_p = !useless_type_conversion_p (type, old_ty);
- tree lhs_conv = convert_lhs_p ? create_tmp_var (type) : lhs;
+ tree lhs_conv = convert_lhs_p ? make_ssa_name (type) : lhs;
unsigned int num_args = gimple_call_num_args (call);
auto_vec<tree, 16> args_conv;
args_conv.safe_grow (num_args);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 3dbd659..450975d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -154,8 +154,10 @@
;; ---- [FP<-INT] Packs
;; ---- [FP<-INT] Unpacks
;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Truncating conversions
;; ---- [FP<-FP] Packs (bfloat16)
;; ---- [FP<-FP] Unpacks
+;; ---- [FP<-FP] Extending conversions
;; ---- [PRED<-PRED] Packs
;; ---- [PRED<-PRED] Unpacks
;;
@@ -702,6 +704,23 @@
}
)
+;; Fold predicated loads/stores with a PTRUE predicate to unpredicated
+;; loads/stores after RA.
+(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str"
+ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w")
+ (unspec:SVE_FULL
+ [(match_operand:<VPRED> 1 "aarch64_simd_imm_one")
+ (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && reload_completed
+ && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
+ && ((REG_P (operands[0]) && MEM_P (operands[2]))
+ || (REG_P (operands[2]) && MEM_P (operands[0])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (match_dup 2))])
+
;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
;; or vectors for which little-endian ordering isn't acceptable. Memory
;; accesses require secondary reloads.
@@ -1286,7 +1305,24 @@
;; -------------------------------------------------------------------------
;; Predicated LD1 (single).
-(define_insn "maskload<mode><vpred>"
+(define_expand "maskload<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 2 "nonmemory_operand")
+ (match_operand:SVE_ALL 1 "memory_operand")
+ (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
+ UNSPEC_LD1_SVE))]
+ "TARGET_SVE"
+ {
+ if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+ DONE;
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (<VPRED>mode, operands[2]);
+ }
+)
+
+;; Predicated LD1 (single).
+(define_insn "*aarch64_maskload<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w")
(unspec:SVE_ALL
[(match_operand:<VPRED> 2 "register_operand" "Upl")
@@ -2287,7 +2323,24 @@
;; -------------------------------------------------------------------------
;; Predicated ST1 (single).
-(define_insn "maskstore<mode><vpred>"
+(define_expand "maskstore<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "memory_operand")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 2 "nonmemory_operand")
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_dup 0)]
+ UNSPEC_ST1_SVE))]
+ "TARGET_SVE"
+ {
+ if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+ DONE;
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (<VPRED>mode, operands[2]);
+ }
+)
+
+;; Predicated ST1 (single).
+(define_insn "*aarch64_maskstore<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
(unspec:SVE_ALL
[(match_operand:<VPRED> 2 "register_operand" "Upl")
@@ -2969,10 +3022,11 @@
{
poly_int64 val;
if (poly_int_rtx_p (operands[2], &val)
- && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+ && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)
+ && !val.is_constant ())
{
- /* The last element can be extracted with a LASTB and a false
- predicate. */
+ /* For VLA, extract the last element with a LASTB and a false
+ predicate. */
rtx sel = aarch64_pfalse_reg (<VPRED>mode);
emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
DONE;
@@ -3133,9 +3187,9 @@
"TARGET_SVE"
{
rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
- CONST1_RTX (<MODE>mode),
- CONST0_RTX (<MODE>mode)));
+ emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode),
+ CONST0_RTX (<MODE>mode),
+ operands[1]));
emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
DONE;
}
@@ -3265,6 +3319,61 @@
;; - REVW
;; -------------------------------------------------------------------------
+(define_split
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+ (rotate:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand")
+ (match_operand:SVE_FULL_HSDI 2 "aarch64_constant_vector_operand")))]
+ "TARGET_SVE && can_create_pseudo_p ()"
+ [(set (match_dup 3)
+ (ashift:SVE_FULL_HSDI (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 0)
+ (plus:SVE_FULL_HSDI
+ (lshiftrt:SVE_FULL_HSDI (match_dup 1)
+ (match_dup 4))
+ (match_dup 3)))]
+ {
+ if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
+ DONE;
+
+ if (!TARGET_SVE2)
+ FAIL;
+
+ operands[3] = gen_reg_rtx (<MODE>mode);
+ HOST_WIDE_INT shift_amount =
+ INTVAL (unwrap_const_vec_duplicate (operands[2]));
+ int bitwidth = GET_MODE_UNIT_BITSIZE (<MODE>mode);
+ operands[4] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ bitwidth - shift_amount);
+ }
+)
+
+;; The RTL combiners are able to combine "ior (ashift, ashiftrt)" to a "bswap".
+;; Match that as well.
+(define_insn_and_split "*v_revvnx8hi"
+ [(parallel
+ [(set (match_operand:VNx8HI 0 "register_operand" "=w")
+ (bswap:VNx8HI (match_operand 1 "register_operand" "w")))
+ (clobber (match_scratch:VNx8BI 2 "=Upl"))])]
+ "TARGET_SVE"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VNx8HI
+ [(match_dup 2)
+ (unspec:VNx8HI
+ [(match_dup 1)]
+ UNSPEC_REVB)]
+ UNSPEC_PRED_X))]
+ {
+ if (!can_create_pseudo_p ())
+ emit_move_insn (operands[2], CONSTM1_RTX (VNx8BImode));
+ else
+ operands[2] = aarch64_ptrue_reg (VNx8BImode);
+ }
+)
+
;; Predicated integer unary operations.
(define_insn "@aarch64_pred_<optab><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
@@ -3841,6 +3950,7 @@
;; -------------------------------------------------------------------------
;; Includes:
;; - NOT
+;; - NOTS
;; -------------------------------------------------------------------------
;; Unpredicated predicate inverse.
@@ -3865,6 +3975,42 @@
"not\t%0.b, %1/z, %2.b"
)
+;; Predicated predicate inverse in which the flags are set in the same
+;; way as a PTEST.
+(define_insn "*one_cmpl<mode>3_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand 3)
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (and:PRED_ALL
+ (not:PRED_ALL
+ (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+ (match_dup 3))]
+ UNSPEC_PTEST))
+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL (not:PRED_ALL (match_dup 2)) (match_dup 3)))]
+ "TARGET_SVE"
+ "nots\t%0.b, %1/z, %2.b"
+)
+
+;; Same, where only the flags result is interesting.
+(define_insn "*one_cmpl<mode>3_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand 3)
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (and:PRED_ALL
+ (not:PRED_ALL
+ (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+ (match_dup 3))]
+ UNSPEC_PTEST))
+ (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
+ "TARGET_SVE"
+ "nots\t%0.b, %1/z, %2.b"
+)
+
;; =========================================================================
;; == Binary arithmetic
;; =========================================================================
@@ -3949,8 +4095,8 @@
(match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , <sve_imm_con> ; * ] #
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , <sve_imm_con> ; * ] #
[ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
[ ?&w , Upl , w , <sve_imm_con> ; yes ] #
[ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
@@ -4079,8 +4225,8 @@
(match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , %0 , vsa ; * ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
+ {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+ [ w , 0 , vsa ; * ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
[ w , 0 , vsn ; * ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
[ w , 0 , vsi ; * ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
[ ?w , w , vsa ; yes ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
@@ -4182,80 +4328,57 @@
(define_expand "@aarch64_adr<mode>_shift"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand")
(plus:SVE_FULL_SDI
- (unspec:SVE_FULL_SDI
- [(match_dup 4)
- (ashift:SVE_FULL_SDI
- (match_operand:SVE_FULL_SDI 2 "register_operand")
- (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
- UNSPEC_PRED_X)
+ (ashift:SVE_FULL_SDI
+ (match_operand:SVE_FULL_SDI 2 "register_operand")
+ (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))
(match_operand:SVE_FULL_SDI 1 "register_operand")))]
"TARGET_SVE && TARGET_NON_STREAMING"
- {
- operands[4] = CONSTM1_RTX (<VPRED>mode);
- }
)
-(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
+(define_insn "*aarch64_adr<mode>_shift"
[(set (match_operand:SVE_24I 0 "register_operand" "=w")
(plus:SVE_24I
- (unspec:SVE_24I
- [(match_operand 4)
- (ashift:SVE_24I
- (match_operand:SVE_24I 2 "register_operand" "w")
- (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
- UNSPEC_PRED_X)
+ (ashift:SVE_24I
+ (match_operand:SVE_24I 2 "register_operand" "w")
+ (match_operand:SVE_24I 3 "const_1_to_3_operand"))
(match_operand:SVE_24I 1 "register_operand" "w")))]
"TARGET_SVE && TARGET_NON_STREAMING"
"adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
- "&& !CONSTANT_P (operands[4])"
- {
- operands[4] = CONSTM1_RTX (<VPRED>mode);
- }
)
;; Same, but with the index being sign-extended from the low 32 bits.
(define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
[(set (match_operand:VNx2DI 0 "register_operand" "=w")
(plus:VNx2DI
- (unspec:VNx2DI
- [(match_operand 4)
- (ashift:VNx2DI
- (unspec:VNx2DI
- [(match_operand 5)
- (sign_extend:VNx2DI
- (truncate:VNx2SI
- (match_operand:VNx2DI 2 "register_operand" "w")))]
- UNSPEC_PRED_X)
- (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
- UNSPEC_PRED_X)
+ (ashift:VNx2DI
+ (unspec:VNx2DI
+ [(match_operand 4)
+ (sign_extend:VNx2DI
+ (truncate:VNx2SI
+ (match_operand:VNx2DI 2 "register_operand" "w")))]
+ UNSPEC_PRED_X)
+ (match_operand:VNx2DI 3 "const_1_to_3_operand"))
(match_operand:VNx2DI 1 "register_operand" "w")))]
"TARGET_SVE && TARGET_NON_STREAMING"
"adr\t%0.d, [%1.d, %2.d, sxtw %3]"
- "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+ "&& !CONSTANT_P (operands[4])"
{
- operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
+ operands[4] = CONSTM1_RTX (VNx2BImode);
}
)
;; Same, but with the index being zero-extended from the low 32 bits.
-(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
+(define_insn "*aarch64_adr_shift_uxtw"
[(set (match_operand:VNx2DI 0 "register_operand" "=w")
(plus:VNx2DI
- (unspec:VNx2DI
- [(match_operand 5)
- (ashift:VNx2DI
- (and:VNx2DI
- (match_operand:VNx2DI 2 "register_operand" "w")
- (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
- (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
- UNSPEC_PRED_X)
+ (ashift:VNx2DI
+ (and:VNx2DI
+ (match_operand:VNx2DI 2 "register_operand" "w")
+ (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
+ (match_operand:VNx2DI 3 "const_1_to_3_operand"))
(match_operand:VNx2DI 1 "register_operand" "w")))]
"TARGET_SVE && TARGET_NON_STREAMING"
"adr\t%0.d, [%1.d, %2.d, uxtw %3]"
- "&& !CONSTANT_P (operands[5])"
- {
- operands[5] = CONSTM1_RTX (VNx2BImode);
- }
)
;; -------------------------------------------------------------------------
@@ -4298,8 +4421,8 @@
(match_dup 3))]
UNSPEC_PRED_X)))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
)
@@ -4513,8 +4636,8 @@
MUL_HIGHPART)]
UNSPEC_PRED_X))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , w ; * ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , w ; * ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
)
@@ -4568,8 +4691,8 @@
(match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ]
- [ &w , Upl , %0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 ]
+ [ &w , Upl , 0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
[(set_attr "movprfx" "yes")])
@@ -4713,8 +4836,8 @@
(match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , %0 , vsl ; * ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
+ {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+ [ w , 0 , vsl ; * ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
[ ?w , w , vsl ; yes ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
[ w , w , w ; * ] <logical>\t%0.d, %1.d, %2.d
}
@@ -4847,7 +4970,7 @@
if (CONST_INT_P (operands[2]))
{
amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
- if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+ if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode))
amount = force_reg (<MODE>mode, amount);
}
else
@@ -4871,15 +4994,40 @@
UNSPEC_PRED_X))]
"TARGET_SVE"
{
+ if (CONSTANT_P (operands[2]))
+ {
+ emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
operands[3] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Shift by a vector, predicated with a PTRUE. We don't actually need
-;; the predicate for the first alternative, but using Upa or X isn't
-;; likely to gain much and would make the instruction seem less uniform
-;; to the register allocator.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+;; Shift by a vector, predicated with a PTRUE.
+(define_expand "@aarch64_pred_<optab><mode>"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE"
+ {
+ if (CONSTANT_P (operands[3]))
+ {
+ emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[2],
+ operands[3]));
+ DONE;
+ }
+ }
+)
+
+;; We don't actually need the predicate for the first alternative, but
+;; using Upa or X isn't likely to gain much and would make the instruction
+;; seem less uniform to the register allocator.
+(define_insn_and_split "*aarch64_pred_<optab><mode>"
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand")
@@ -4894,33 +5042,32 @@
[ w , Upl , w , 0 ; * ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
- "&& reload_completed
- && !register_operand (operands[3], <MODE>mode)"
+ "&& !register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
""
)
-;; Unpredicated shift operations by a constant (post-RA only).
+;; Unpredicated shift operations by a constant.
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
-(define_insn "*post_ra_v_ashl<mode>3"
+(define_insn "aarch64_vashl<mode>3_const"
[(set (match_operand:SVE_I 0 "register_operand")
(ashift:SVE_I
(match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
- "TARGET_SVE && reload_completed"
+ "TARGET_SVE"
{@ [ cons: =0 , 1 , 2 ]
[ w , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
[ w , w , Dl ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
}
)
-(define_insn "*post_ra_v_<optab><mode>3"
+(define_insn "aarch64_v<optab><mode>3_const"
[(set (match_operand:SVE_I 0 "register_operand" "=w")
(SHIFTRT:SVE_I
(match_operand:SVE_I 1 "register_operand" "w")
(match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
- "TARGET_SVE && reload_completed"
+ "TARGET_SVE"
"<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
)
@@ -5348,27 +5495,27 @@
;; Split a predicated instruction whose predicate is unused into an
;; unpredicated instruction.
(define_split
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
<SVE_COND_FP>))]
"TARGET_SVE
&& reload_completed
&& INTVAL (operands[4]) == SVE_RELAXED_GP"
[(set (match_dup 0)
- (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF (match_dup 2) (match_dup 3)))]
+ (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 (match_dup 2) (match_dup 3)))]
)
;; Unpredicated floating-point binary operations (post-RA only).
;; These are generated by the split above.
(define_insn "*post_ra_<sve_fp_op><mode>3"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
- (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF
- (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")
- (match_operand:SVE_FULL_F_BF 2 "register_operand" "w")))]
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
+ (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16
+ (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand" "w")))]
"TARGET_SVE && reload_completed"
"<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
@@ -5412,12 +5559,12 @@
;; Unpredicated floating-point binary operations that need to be predicated
;; for SVE.
(define_expand "<optab><mode>3"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 3)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 1 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs2_operand>")]
+ (match_operand:SVE_FULL_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY_OPTAB))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -5444,30 +5591,30 @@
;; Predicated floating-point operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_BF 3 "<sve_pred_fp_rhs2_operand>")]
+ (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
)
;; Predicated floating-point operations, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5483,14 +5630,14 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5546,14 +5693,14 @@
;; Predicated floating-point operations, merging with the second input.
(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5569,14 +5716,14 @@
)
(define_insn "*cond_<optab><mode>_3_strict"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5589,16 +5736,16 @@
;; Predicated floating-point operations, merging with an independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5633,16 +5780,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5753,8 +5900,8 @@
(match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
SVE_COND_FP_ADD))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
- [ w , Upl , %0 , vsA , i ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ]
+ [ w , Upl , 0 , vsA , i ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
[ w , Upl , 0 , vsN , i ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
[ w , Upl , w , w , Z ; * ] #
[ w , Upl , 0 , w , Ui1 ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -6228,8 +6375,8 @@
UNSPEC_COND_FSUB)]
UNSPEC_COND_FABS))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
"&& !rtx_equal_p (operands[1], operands[5])"
@@ -6251,8 +6398,8 @@
UNSPEC_COND_FSUB)]
UNSPEC_COND_FABS))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
)
@@ -6492,8 +6639,8 @@
(match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
SVE_COND_FP_MUL))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
- [ w , Upl , %0 , vsM , i ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ]
+ [ w , Upl , 0 , vsM , i ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
[ w , Upl , w , w , Z ; * ] #
[ w , Upl , 0 , w , Ui1 ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , vsM , i ; yes ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
@@ -6506,13 +6653,13 @@
;; Unpredicated multiplication by selected lanes.
(define_insn "@aarch64_mul_lane_<mode>"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
- (mult:SVE_FULL_F_BF
- (unspec:SVE_FULL_F_BF
- [(match_operand:SVE_FULL_F_BF 2 "register_operand" "<sve_lane_con>")
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
+ (mult:SVE_FULL_F_B16B16
+ (unspec:SVE_FULL_F_B16B16
+ [(match_operand:SVE_FULL_F_B16B16 2 "register_operand" "<sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
- (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")))]
+ (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")))]
"TARGET_SVE"
"<b>fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
)
@@ -6571,10 +6718,10 @@
;; by providing this, but we need to use UNSPECs since rtx logical ops
;; aren't defined for floating-point modes.
(define_insn "*<optab><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:SVE_F 1 "register_operand" "w")
- (match_operand:SVE_F 2 "register_operand" "w")]
+ [(set (match_operand:SVE_F_BF 0 "register_operand" "=w")
+ (unspec:SVE_F_BF
+ [(match_operand:SVE_F_BF 1 "register_operand" "w")
+ (match_operand:SVE_F_BF 2 "register_operand" "w")]
LOGICALF))]
"TARGET_SVE"
"<logicalf_op>\t%0.d, %1.d, %2.d"
@@ -6726,8 +6873,8 @@
(match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
SVE_COND_FP_MAXMIN))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , vsB ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , vsB ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
[ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , vsB ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -7035,8 +7182,8 @@
UNSPEC_PRED_X)
(match_operand:SVE_I 4 "register_operand")))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
- [ w , Upl , %0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ]
+ [ w , Upl , 0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
[ w , Upl , w , w , 0 ; * ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
}
@@ -7177,8 +7324,8 @@
(match_operand:SVE_I 3 "register_operand"))]
UNSPEC_PRED_X)))]
"TARGET_SVE"
- {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
- [ w , Upl , %0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ]
+ [ w , Upl , 0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
[ w , Upl , w , w , 0 ; * ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
}
@@ -7434,13 +7581,13 @@
;; Unpredicated floating-point ternary operations.
(define_expand "<optab><mode>4"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 1 "register_operand")
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 1 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7450,17 +7597,17 @@
;; Predicated floating-point ternary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
(match_operand:SI 5 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
- {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx , is_rev ]
- [ w , Upl , %w , w , 0 ; * , * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ]
+ [ w , Upl , w , w , 0 ; * , * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
[ w , Upl , 0 , w , w ; * , true ] <b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
[ ?&w , Upl , w , w , w ; yes , * ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
}
@@ -7470,17 +7617,17 @@
;; Predicated floating-point ternary operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7539,15 +7686,15 @@
;; Predicated floating-point ternary operations, merging with the
;; third input.
(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7563,15 +7710,15 @@
)
(define_insn "*cond_<optab><mode>_4_strict"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7585,17 +7732,17 @@
;; Predicated floating-point ternary operations, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_operand 6)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -7631,17 +7778,17 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_BF
+ (unspec:SVE_FULL_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_BF 2 "register_operand")
- (match_operand:SVE_FULL_F_BF 3 "register_operand")
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -7672,14 +7819,14 @@
;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
;; (fma ...) since target-independent code won't understand the indexing.
(define_insn "@aarch64_<optab>_lane_<mode>"
- [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
- (unspec:SVE_FULL_F_BF
- [(match_operand:SVE_FULL_F_BF 1 "register_operand")
- (unspec:SVE_FULL_F_BF
- [(match_operand:SVE_FULL_F_BF 2 "register_operand")
+ [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
+ [(match_operand:SVE_FULL_F_B16B16 1 "register_operand")
+ (unspec:SVE_FULL_F_B16B16
+ [(match_operand:SVE_FULL_F_B16B16 2 "register_operand")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
- (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+ (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
SVE_FP_TERNARY_LANE))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
@@ -9379,18 +9526,37 @@
;; - FCVTZU
;; -------------------------------------------------------------------------
-;; Unpredicated conversion of floats to integers of the same size (HF to HI,
-;; SF to SI or DF to DI).
-(define_expand "<optab><mode><v_int_equiv>2"
- [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
- (unspec:<V_INT_EQUIV>
+;; Unpredicated conversion of floats to integers of the same size or wider,
+;; excluding conversions from DF (see below).
+(define_expand "<optab><SVE_HSF:mode><SVE_HSDI:mode>2"
+ [(set (match_operand:SVE_HSDI 0 "register_operand")
+ (unspec:SVE_HSDI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_operand:SVE_HSF 1 "register_operand")]
+ SVE_COND_FCVTI))]
+ "TARGET_SVE
+ && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_HSF:self_mask>) == 0"
+ {
+ operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]);
+ }
+)
+
+;; SI <- DF can't use SI <- trunc (DI <- DF) without -ffast-math, so this
+;; truncating variant of FCVTZ{S,U} is useful for auto-vectorization.
+;;
+;; DF is the only source mode for which the mask used above doesn't apply,
+;; we define a separate pattern for it here.
+(define_expand "<optab><VNx2DF_ONLY:mode><SVE_2SDI:mode>2"
+ [(set (match_operand:SVE_2SDI 0 "register_operand")
+ (unspec:SVE_2SDI
[(match_dup 2)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 1 "register_operand")]
+ (match_operand:VNx2DF_ONLY 1 "register_operand")]
SVE_COND_FCVTI))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[2] = aarch64_ptrue_reg (VNx2BImode);
}
)
@@ -9409,18 +9575,37 @@
}
)
-;; Predicated narrowing float-to-integer conversion.
-(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
- [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
- (unspec:VNx4SI_ONLY
+;; As above, for pairs used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>"
+ [(set (match_operand:SVE_HSDI 0 "register_operand")
+ (unspec:SVE_HSDI
+ [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_PARTIAL_F 2 "register_operand")]
+ SVE_COND_FCVTI))]
+ "TARGET_SVE
+ && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
+ [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+ [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+ }
+)
+
+;; Predicated narrowing float-to-integer conversion. The VNx2DF->VNx4SI
+;; variant is provided for the ACLE, where the zeroed odd-indexed lanes are
+;; significant. The VNx2DF->VNx2SI variant is provided for auto-vectorization,
+;; where the upper 32 bits of each container are ignored.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><SVE_SI:mode>"
+ [(set (match_operand:SVE_SI 0 "register_operand")
+ (unspec:SVE_SI
[(match_operand:VNx2BI 1 "register_operand")
(match_operand:SI 3 "aarch64_sve_gp_strictness")
(match_operand:VNx2DF_ONLY 2 "register_operand")]
SVE_COND_FCVTI))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
- [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+ [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+ [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
}
)
@@ -9565,18 +9750,19 @@
;; - UCVTF
;; -------------------------------------------------------------------------
-;; Unpredicated conversion of integers to floats of the same size
-;; (HI to HF, SI to SF or DI to DF).
-(define_expand "<optab><v_int_equiv><mode>2"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+;; Unpredicated conversion of integers to floats of the same size or
+;; narrower.
+(define_expand "<optab><SVE_HSDI:mode><SVE_F:mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_dup 2)
- (const_int SVE_RELAXED_GP)
- (match_operand:<V_INT_EQUIV> 1 "register_operand")]
+ (match_dup 3)
+ (match_operand:SVE_HSDI 1 "register_operand")]
SVE_COND_ICVTF))]
- "TARGET_SVE"
+ "TARGET_SVE
+ && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_F:self_mask>) == 0"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]);
}
)
@@ -9596,6 +9782,22 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>"
+ [(set (match_operand:SVE_PARTIAL_F 0 "register_operand")
+ (unspec:SVE_PARTIAL_F
+ [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_HSDI 2 "register_operand")]
+ SVE_COND_ICVTF))]
+ "TARGET_SVE
+ && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
+ [ w , Upl , 0 ; * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+ [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+ }
+)
+
;; Predicated widening integer-to-float conversion.
(define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
[(set (match_operand:VNx2DF_ONLY 0 "register_operand")
@@ -9779,6 +9981,27 @@
}
)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Truncating conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated float-to-float truncation.
+(define_expand "trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>2"
+ [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+ (unspec:SVE_PARTIAL_HSF
+ [(match_dup 2)
+ (match_dup 3)
+ (match_operand:SVE_SDF 1 "register_operand")]
+ SVE_COND_FCVT))]
+ "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+ {
+ operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]);
+ }
+)
+
;; Predicated float-to-float truncation.
(define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
[(set (match_operand:SVE_FULL_HSF 0 "register_operand")
@@ -9794,6 +10017,21 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>"
+ [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+ (unspec:SVE_PARTIAL_HSF
+ [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_SDF 2 "register_operand")]
+ SVE_COND_FCVT))]
+ "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
+ [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+ [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+ }
+)
+
;; Predicated float-to-float truncation with merging.
(define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
[(set (match_operand:SVE_FULL_HSF 0 "register_operand")
@@ -9936,6 +10174,27 @@
}
)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Extending conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated float-to-float extension.
+(define_expand "extend<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>2"
+ [(set (match_operand:SVE_SDF 0 "register_operand")
+ (unspec:SVE_SDF
+ [(match_dup 2)
+ (match_dup 3)
+ (match_operand:SVE_PARTIAL_HSF 1 "register_operand")]
+ SVE_COND_FCVT))]
+ "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+ {
+ operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]);
+ }
+)
+
;; Predicated float-to-float extension.
(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand")
@@ -9951,6 +10210,21 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>"
+ [(set (match_operand:SVE_SDF 0 "register_operand")
+ (unspec:SVE_SDF
+ [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_PARTIAL_HSF 2 "register_operand")]
+ SVE_COND_FCVT))]
+ "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
+ [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+ [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+ }
+)
+
;; Predicated float-to-float extension with merging.
(define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 3e08e09..62524f3 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -784,8 +784,8 @@
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 3 "register_operand")))]
"TARGET_SVE2p1_OR_SME"
- {@ [cons: =0, 1, 2, 3; attrs: movprfx]
- [ w, %0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+ [ w, 0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
[ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
}
)
@@ -804,8 +804,8 @@
(match_operand:SVE_FULL_I 3 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE2p1_OR_SME"
- {@ [cons: =0, 1, 2, 3; attrs: movprfx]
- [ w, %0, w, w; * ] #
+ {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+ [ w, 0, w, w; * ] #
[ ?&w, w, w, w; yes ] #
}
"&& true"
@@ -1373,8 +1373,8 @@
(match_operand:SVE_CLAMP_F 3 "register_operand")]
UNSPEC_FMINNM))]
""
- {@ [cons: =0, 1, 2, 3; attrs: movprfx]
- [ w, %0, w, w; * ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+ [ w, 0, w, w; * ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
[ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
}
)
@@ -1393,8 +1393,8 @@
(match_operand:SVE_CLAMP_F 3 "register_operand")]
UNSPEC_COND_FMINNM))]
""
- {@ [cons: =0, 1, 2, 3; attrs: movprfx]
- [ w, %0, w, w; * ] #
+ {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+ [ w, 0, w, w; * ] #
[ ?&w, w, w, w; yes ] #
}
"&& true"
@@ -1626,8 +1626,8 @@
(match_operand:SVE_FULL_I 2 "register_operand")))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
- {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , %0 , w ; * ] nbsl\t%0.d, %0.d, %2.d, %0.d
+ {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+ [ w , 0 , w ; * ] nbsl\t%0.d, %0.d, %2.d, %0.d
[ ?&w , w , w ; yes ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d
}
"&& !CONSTANT_P (operands[3])"
@@ -1648,8 +1648,8 @@
(match_operand:SVE_FULL_I 2 "register_operand")))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
- {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , %0 , w ; * ] nbsl\t%0.d, %0.d, %2.d, %2.d
+ {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+ [ w , 0 , w ; * ] nbsl\t%0.d, %0.d, %2.d, %2.d
[ ?&w , w , w ; yes ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d
}
"&& !CONSTANT_P (operands[3])"
@@ -1932,40 +1932,27 @@
(define_expand "@aarch64_sve_add_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(plus:SVE_FULL_I
- (unspec:SVE_FULL_I
- [(match_dup 4)
- (SHIFTRT:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
- UNSPEC_PRED_X)
- (match_operand:SVE_FULL_I 1 "register_operand")))]
+ (SHIFTRT:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
+ (match_operand:SVE_FULL_I 1 "register_operand")))]
"TARGET_SVE2"
- {
- operands[4] = CONSTM1_RTX (<VPRED>mode);
- }
)
;; Pattern-match SSRA and USRA as a predicated operation whose predicate
;; isn't needed.
-(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+(define_insn "*aarch64_sve2_sra<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(plus:SVE_FULL_I
- (unspec:SVE_FULL_I
- [(match_operand 4)
- (SHIFTRT:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
- UNSPEC_PRED_X)
+ (SHIFTRT:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
(match_operand:SVE_FULL_I 1 "register_operand")))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
[ w , 0 , w ; * ] <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
[ ?&w , w , w ; yes ] movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
}
- "&& !CONSTANT_P (operands[4])"
- {
- operands[4] = CONSTM1_RTX (<VPRED>mode);
- }
)
;; SRSRA and URSRA.
@@ -2715,17 +2702,14 @@
;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
(define_insn "*bitmask_shift_plus<mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
- (unspec:SVE_FULL_HSDI
- [(match_operand:<VPRED> 1)
- (lshiftrt:SVE_FULL_HSDI
- (plus:SVE_FULL_HSDI
- (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
- (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
- (match_operand:SVE_FULL_HSDI 4
- "aarch64_simd_shift_imm_vec_exact_top" ""))]
- UNSPEC_PRED_X))]
+ (lshiftrt:SVE_FULL_HSDI
+ (plus:SVE_FULL_HSDI
+ (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
+ (match_operand:SVE_FULL_HSDI 3
+ "aarch64_simd_shift_imm_vec_exact_top" "")))]
"TARGET_SVE2"
- "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+ "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
)
;; -------------------------------------------------------------------------
@@ -2951,8 +2935,8 @@
UNSPEC_COND_FABS)]
SVE_COND_SMAXMIN))]
"TARGET_FAMINMAX && TARGET_SVE2_OR_SME2"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , %0 , w ; * ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
+ [ w , Upl , 0 , w ; * ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
}
"&& (!rtx_equal_p (operands[1], operands[5])
diff --git a/gcc/config/aarch64/aarch64-vxworks.h b/gcc/config/aarch64/aarch64-vxworks.h
index 41adada..7b4da93 100644
--- a/gcc/config/aarch64/aarch64-vxworks.h
+++ b/gcc/config/aarch64/aarch64-vxworks.h
@@ -66,9 +66,8 @@ along with GCC; see the file COPYING3. If not see
#define VXWORKS_PERSONALITY "llvm"
/* VxWorks uses R18 as a TCB pointer. We must pick something else as
- the static chain and R18 needs to be claimed "fixed". Until we
- arrange to override the common parts of the port family to
- acknowledge the latter, configure --with-specs="-ffixed-r18". */
+ the static chain and R18 needs to be claimed "fixed" (TARGET_OS_USES_R18
+ does that in aarch64_conditional_register_usage). */
#undef STATIC_CHAIN_REGNUM
#define STATIC_CHAIN_REGNUM 9
-
+#define TARGET_OS_USES_R18
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4e80114..af8415c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -83,6 +83,7 @@
#include "rtlanal.h"
#include "tree-dfa.h"
#include "asan.h"
+#include "aarch64-elf-metadata.h"
#include "aarch64-feature-deps.h"
#include "config/arm/aarch-common.h"
#include "config/arm/aarch-common-protos.h"
@@ -108,6 +109,10 @@
and 1 MOVI/DUP (same size as a call). */
#define MAX_SET_SIZE(speed) (speed ? 256 : 96)
+#ifndef HAVE_AS_AEABI_BUILD_ATTRIBUTES
+#define HAVE_AS_AEABI_BUILD_ATTRIBUTES 0
+#endif
+
/* Flags that describe how a function shares certain architectural state
with its callers.
@@ -3201,8 +3206,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
else
aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
RTL_CONST_CALL_P (insns) = 1;
emit_libcall_block (insns, tmp_reg, result, imm);
@@ -3667,6 +3671,14 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
if (builder.nelts_per_pattern () == 3)
return 0;
+ /* It is conservatively correct to drop the element size to a lower value,
+ and we must do so if the predicate consists of a leading "foreground"
+ sequence that is smaller than the element size. Without this,
+ we would test only one bit and so treat everything as either an
+ all-true or an all-false predicate. */
+ if (builder.nelts_per_pattern () == 2)
+ elt_size = MIN (elt_size, builder.npatterns ());
+
/* Skip over leading set bits. */
unsigned int nelts = builder.encoded_nelts ();
unsigned int i = 0;
@@ -3698,6 +3710,24 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
return vl;
}
+/* Return:
+
+ * -1 if all bits of PRED are set
+ * N if PRED has N leading set bits followed by all clear bits
+ * 0 if PRED does not have any of these forms. */
+
+int
+aarch64_partial_ptrue_length (rtx pred)
+{
+ rtx_vector_builder builder;
+ if (!aarch64_get_sve_pred_bits (builder, pred))
+ return 0;
+
+ auto elt_size = vector_element_size (GET_MODE_BITSIZE (GET_MODE (pred)),
+ GET_MODE_NUNITS (GET_MODE (pred)));
+ return aarch64_partial_ptrue_length (builder, elt_size);
+}
+
/* See if there is an svpattern that encodes an SVE predicate of mode
PRED_MODE in which the first VL bits are set and the rest are clear.
Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
@@ -3830,6 +3860,44 @@ aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
}
+
+/* Generate a predicate to control partial SVE mode DATA_MODE as if it
+ were fully packed, enabling the defined elements only. */
+rtx
+aarch64_sve_packed_pred (machine_mode data_mode)
+{
+ unsigned int container_bytes
+ = aarch64_sve_container_bits (data_mode) / BITS_PER_UNIT;
+ /* Enable the significand of each container only. */
+ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (container_bytes));
+ /* Predicate at the element size. */
+ machine_mode pmode
+ = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (data_mode)).require ();
+ return gen_lowpart (pmode, ptrue);
+}
+
+/* Generate a predicate and strictness value to govern a floating-point
+ operation with SVE mode DATA_MODE.
+
+ If DATA_MODE is a partial vector mode, this pair prevents the operation
+ from interpreting undefined elements - unless we don't need to suppress
+ their trapping behavior. */
+rtx
+aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
+{
+ unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+ if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+ {
+ if (strictness)
+ *strictness = gen_int_mode (SVE_STRICT_GP, SImode);
+ return aarch64_sve_packed_pred (data_mode);
+ }
+ if (strictness)
+ *strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
+ /* Use the VPRED mode. */
+ return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
+}
+
/* Emit a comparison CMP between OP0 and OP1, both of which have mode
DATA_MODE, and return the result in a predicate of mode PRED_MODE.
Use TARGET as the target register if nonnull and convenient. */
@@ -6410,19 +6478,51 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
return gen_rtx_MEM (mode, force_reg (Pmode, addr));
}
-/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
- that is known to contain PTRUE. */
+/* Emit a load/store from a subreg of SRC to a subreg of DEST.
+ The subregs have mode NEW_MODE. Use only for reg<->mem moves. */
+void
+aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
+{
+ gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
+ || (MEM_P (src) && register_operand (dest, VOIDmode)));
+ auto mode = GET_MODE (dest);
+ auto int_mode = aarch64_sve_int_mode (mode);
+ if (MEM_P (src))
+ {
+ rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
+ tmp = force_lowpart_subreg (int_mode, tmp, new_mode);
+ emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode));
+ }
+ else
+ {
+ src = force_lowpart_subreg (int_mode, src, mode);
+ emit_move_insn (adjust_address (dest, new_mode, 0),
+ force_lowpart_subreg (new_mode, src, int_mode));
+ }
+}
+
+/* PRED is a predicate that is known to contain PTRUE.
+ For 128-bit VLS loads/stores, emit LDR/STR.
+ Else, emit an SVE predicated move from SRC to DEST. */
void
aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
{
- expand_operand ops[3];
machine_mode mode = GET_MODE (dest);
- create_output_operand (&ops[0], dest, mode);
- create_input_operand (&ops[1], pred, GET_MODE(pred));
- create_input_operand (&ops[2], src, mode);
- temporary_volatile_ok v (true);
- expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
+ if ((MEM_P (dest) || MEM_P (src))
+ && known_eq (GET_MODE_SIZE (mode), 16)
+ && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
+ && !BYTES_BIG_ENDIAN)
+ aarch64_emit_load_store_through_mode (dest, src, V16QImode);
+ else
+ {
+ expand_operand ops[3];
+ create_output_operand (&ops[0], dest, mode);
+ create_input_operand (&ops[1], pred, GET_MODE(pred));
+ create_input_operand (&ops[2], src, mode);
+ temporary_volatile_ok v (true);
+ expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
+ }
}
/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
@@ -8699,6 +8799,13 @@ aarch_bti_j_insn_p (rtx_insn *insn)
return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
}
+/* Return TRUE if Pointer Authentication for the return address is enabled. */
+bool
+aarch64_pacret_enabled (void)
+{
+ return (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
+}
+
/* Return TRUE if Guarded Control Stack is enabled. */
bool
aarch64_gcs_enabled (void)
@@ -9417,13 +9524,16 @@ aarch64_emit_stack_tie (rtx reg)
}
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
- registers. If POLY_SIZE is not large enough to require a probe this function
- will only adjust the stack. When allocating the stack space
- FRAME_RELATED_P is then used to indicate if the allocation is frame related.
- FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
- the saved registers. If we are then we ensure that any allocation
- larger than the ABI defined buffer needs a probe so that the
- invariant of having a 1KB buffer is maintained.
+ registers, given that the stack pointer is currently BYTES_BELOW_SP bytes
+ above the bottom of the static frame.
+
+ If POLY_SIZE is not large enough to require a probe this function will only
+ adjust the stack. When allocating the stack space FRAME_RELATED_P is then
+ used to indicate if the allocation is frame related. FINAL_ADJUSTMENT_P
+ indicates whether we are allocating the area below the saved registers.
+ If we are then we ensure that any allocation larger than the ABI defined
+ buffer needs a probe so that the invariant of having a 1KB buffer is
+ maintained.
We emit barriers after each stack adjustment to prevent optimizations from
breaking the invariant that we never drop the stack more than a page. This
@@ -9440,6 +9550,7 @@ aarch64_emit_stack_tie (rtx reg)
static void
aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
poly_int64 poly_size,
+ poly_int64 bytes_below_sp,
aarch64_isa_mode force_isa_mode,
bool frame_related_p,
bool final_adjustment_p)
@@ -9503,8 +9614,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
poly_size, temp1, temp2, force_isa_mode,
false, true);
- rtx_insn *insn = get_last_insn ();
-
+ auto initial_cfa_offset = frame.frame_size - bytes_below_sp;
+ auto final_cfa_offset = initial_cfa_offset + poly_size;
if (frame_related_p)
{
/* This is done to provide unwinding information for the stack
@@ -9514,28 +9625,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
The tie will expand to nothing but the optimizers will not touch
the instruction. */
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
- emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+ auto *insn = emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
aarch64_emit_stack_tie (stack_ptr_copy);
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
- add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, stack_ptr_copy,
+ initial_cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
rtx guard_const = gen_int_mode (guard_size, Pmode);
- insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
- stack_pointer_rtx, temp1,
- probe_const, guard_const));
+ auto *insn
+ = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
+ stack_pointer_rtx, temp1,
+ probe_const, guard_const));
/* Now reset the CFA register if needed. */
if (frame_related_p)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- gen_int_mode (poly_size, Pmode)));
+ plus_constant (Pmode, stack_pointer_rtx,
+ final_cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9581,12 +9695,13 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
We can determine which allocation we are doing by looking at
the value of FRAME_RELATED_P since the final allocations are not
frame related. */
+ auto cfa_offset = frame.frame_size - (bytes_below_sp - rounded_size);
if (frame_related_p)
{
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, temp1, rounded_size));
+ plus_constant (Pmode, temp1, cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9608,7 +9723,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
if (frame_related_p)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+ plus_constant (Pmode, stack_pointer_rtx, cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9916,17 +10031,22 @@ aarch64_expand_prologue (void)
code below does not handle it for -fstack-clash-protection. */
gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
+ /* The offset of the current SP from the bottom of the static frame. */
+ poly_int64 bytes_below_sp = frame_size;
+
/* Will only probe if the initial adjustment is larger than the guard
less the amount of the guard reserved for use by the caller's
outgoing args. */
aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
- force_isa_mode, true, false);
+ bytes_below_sp, force_isa_mode,
+ true, false);
+ bytes_below_sp -= initial_adjust;
if (callee_adjust != 0)
- aarch64_push_regs (reg1, reg2, callee_adjust);
-
- /* The offset of the current SP from the bottom of the static frame. */
- poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+ {
+ aarch64_push_regs (reg1, reg2, callee_adjust);
+ bytes_below_sp -= callee_adjust;
+ }
if (emit_frame_chain)
{
@@ -9994,7 +10114,7 @@ aarch64_expand_prologue (void)
|| known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
sve_callee_adjust,
- force_isa_mode,
+ bytes_below_sp, force_isa_mode,
!frame_pointer_needed, false);
bytes_below_sp -= sve_callee_adjust;
}
@@ -10005,10 +10125,11 @@ aarch64_expand_prologue (void)
/* We may need to probe the final adjustment if it is larger than the guard
that is assumed by the called. */
- gcc_assert (known_eq (bytes_below_sp, final_adjust));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- force_isa_mode,
+ bytes_below_sp, force_isa_mode,
!frame_pointer_needed, true);
+ bytes_below_sp -= final_adjust;
+ gcc_assert (known_eq (bytes_below_sp, 0));
if (emit_frame_chain && maybe_ne (final_adjust, 0))
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
@@ -14507,6 +14628,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
we don't need to consider that here. */
if (x == const0_rtx)
*cost = 0;
+ /* If the outer is a COMPARE which is used by the middle-end
+ and the constant fits how the cmp instruction allows, say the cost
+ is the same as 1 insn. */
+ else if (outer == COMPARE
+ && (aarch64_uimm12_shift (INTVAL (x))
+ || aarch64_uimm12_shift (-UINTVAL (x))))
+ *cost = COSTS_N_INSNS (1);
else
{
/* To an approximation, building any other constant is
@@ -18748,9 +18876,16 @@ aarch64_override_options_internal (struct gcc_options *opts)
aarch64_stack_protector_guard_offset = offs;
}
- if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK)
- && !fixed_regs[R18_REGNUM])
- error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+ if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK))
+ {
+ if (!fixed_regs[R18_REGNUM])
+ error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+#ifdef TARGET_OS_USES_R18
+ else
+ sorry ("%<-fsanitize=shadow-call-stack%> conflicts with the use of"
+ " register x18 by the target operating system");
+#endif
+ }
aarch64_feature_flags isa_flags = aarch64_get_isa_flags (opts);
if ((isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
@@ -20777,7 +20912,6 @@ aarch64_get_function_versions_dispatcher (void *decl)
struct cgraph_node *node = NULL;
struct cgraph_node *default_node = NULL;
struct cgraph_function_version_info *node_v = NULL;
- struct cgraph_function_version_info *first_v = NULL;
tree dispatch_decl = NULL;
@@ -20794,37 +20928,16 @@ aarch64_get_function_versions_dispatcher (void *decl)
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
- /* Find the default version and make it the first node. */
- first_v = node_v;
- /* Go to the beginning of the chain. */
- while (first_v->prev != NULL)
- first_v = first_v->prev;
- default_version_info = first_v;
- while (default_version_info != NULL)
- {
- if (get_feature_mask_for_version
- (default_version_info->this_node->decl) == 0ULL)
- break;
- default_version_info = default_version_info->next;
- }
+ /* The default node is always the beginning of the chain. */
+ default_version_info = node_v;
+ while (default_version_info->prev)
+ default_version_info = default_version_info->prev;
+ default_node = default_version_info->this_node;
/* If there is no default node, just return NULL. */
- if (default_version_info == NULL)
+ if (!is_function_default_version (default_node->decl))
return NULL;
- /* Make default info the first node. */
- if (first_v != default_version_info)
- {
- default_version_info->prev->next = default_version_info->next;
- if (default_version_info->next)
- default_version_info->next->prev = default_version_info->prev;
- first_v->prev = default_version_info;
- default_version_info->next = first_v;
- default_version_info->prev = NULL;
- }
-
- default_node = default_version_info->this_node;
-
if (targetm.has_ifunc_p ())
{
struct cgraph_function_version_info *it_v = NULL;
@@ -21968,6 +22081,14 @@ aarch64_conditional_register_usage (void)
fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
}
+
+#ifdef TARGET_OS_USES_R18
+ /* R18 is the STATIC_CHAIN_REGNUM on most aarch64 ports, but VxWorks
+ uses it as the TCB, so aarch64-vxworks.h overrides
+ STATIC_CHAIN_REGNUM, and here we mark R18 as fixed. */
+ fixed_regs[R18_REGNUM] = 1;
+ call_used_regs[R18_REGNUM] = 1;
+#endif
}
/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. */
@@ -23495,6 +23616,39 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
return false;
}
+/* Try to optimize the expansion of a maskload or maskstore with
+ the operands in OPERANDS, given that the vector being loaded or
+ stored has mode MODE. Return true on success or false if the normal
+ expansion should be used. */
+
+bool
+aarch64_expand_maskloadstore (rtx *operands, machine_mode mode)
+{
+ /* If the predicate in operands[2] is a patterned SVE PTRUE predicate
+ with patterns VL1, VL2, VL4, VL8, or VL16 and at most the bottom
+ 128 bits are loaded/stored, emit an ASIMD load/store. */
+ int vl = aarch64_partial_ptrue_length (operands[2]);
+ int width = vl * GET_MODE_UNIT_BITSIZE (mode);
+ if (width <= 128
+ && pow2p_hwi (vl)
+ && (vl == 1
+ || (!BYTES_BIG_ENDIAN
+ && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA)))
+ {
+ machine_mode new_mode;
+ if (known_eq (width, 128))
+ new_mode = V16QImode;
+ else if (known_eq (width, 64))
+ new_mode = V8QImode;
+ else
+ new_mode = int_mode_for_size (width, 0).require ();
+ aarch64_emit_load_store_through_mode (operands[0], operands[1],
+ new_mode);
+ return true;
+ }
+ return false;
+}
+
/* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD. */
bool
aarch64_simd_valid_mov_imm (rtx op)
@@ -23516,6 +23670,36 @@ aarch64_simd_valid_and_imm (rtx op)
return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND);
}
+/* Return true if OP is a valid SIMD and immediate which allows the and to be
+ optimized as fmov. If ELT_BITSIZE is nonnull, use it to return the number of
+ bits to move. */
+bool
+aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_bitsize)
+{
+ machine_mode mode = GET_MODE (op);
+ gcc_assert (!aarch64_sve_mode_p (mode));
+
+ auto_vec<target_unit, 16> buffer;
+ unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant ();
+ buffer.reserve (n_bytes);
+
+ bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes);
+ gcc_assert (ok);
+
+ auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes * BITS_PER_UNIT);
+ int set_bit = wi::exact_log2 (mask + 1);
+ if ((set_bit == 16 && TARGET_SIMD_F16INST)
+ || set_bit == 32
+ || set_bit == 64)
+ {
+ if (elt_bitsize)
+ *elt_bitsize = set_bit;
+ return true;
+ }
+
+ return false;
+}
+
/* Return true if OP is a valid SIMD xor immediate for SVE. */
bool
aarch64_simd_valid_xor_imm (rtx op)
@@ -23551,6 +23735,19 @@ aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
return IN_RANGE (INTVAL (x), 1, bit_width);
}
+
+/* Check whether X can control SVE mode MODE. */
+bool
+aarch64_sve_valid_pred_p (rtx x, machine_mode mode)
+{
+ machine_mode pred_mode = GET_MODE (x);
+ if (!aarch64_sve_pred_mode_p (pred_mode))
+ return false;
+
+ return known_ge (GET_MODE_NUNITS (pred_mode),
+ GET_MODE_NUNITS (mode));
+}
+
/* Return the bitmask CONST_INT to select the bits required by a zero extract
operation of width WIDTH at bit position POS. */
@@ -23809,6 +24006,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands,
return true;
}
+/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest
+ NELTS bits, if OP is a power of 2. Otherwise, returns -1. */
+
+int
+aarch64_exact_log2_inverse (unsigned int nelts, rtx op)
+{
+ return exact_log2 ((~INTVAL (op))
+ & ((HOST_WIDE_INT_1U << nelts) - 1));
+}
+
/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
HIGH (exclusive). */
void
@@ -24514,6 +24721,28 @@ seq_cost_ignoring_scalar_moves (const rtx_insn *seq, bool speed)
return cost;
}
+/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
+ into it. Narrow *VECTOR and *INDEX so that they reference a single vector
+ of mode SUBVEC_MODE. IS_DEST is true if *VECTOR is a destination operand,
+ false if it is a source operand. */
+
+void
+aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
+ rtx *vector, rtx *index, bool is_dest)
+{
+ auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
+ auto subvec = UINTVAL (*index) / elts_per_vector;
+ auto subelt = UINTVAL (*index) % elts_per_vector;
+ auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
+ if (is_dest)
+ *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
+ subvec_byte);
+ else
+ *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
+ subvec_byte);
+ *index = gen_int_mode (subelt, SImode);
+}
+
/* Expand a vector initialization sequence, such that TARGET is
initialized to contain VALS. */
@@ -24547,8 +24776,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
rtx tmp_reg = gen_reg_rtx (GET_MODE (new_vals));
aarch64_expand_vector_init (tmp_reg, new_vals);
halves[i] = gen_rtx_SUBREG (mode, tmp_reg, 0);
- rtx_insn *rec_seq = get_insns ();
- end_sequence ();
+ rtx_insn *rec_seq = end_sequence ();
costs[i] = seq_cost_ignoring_scalar_moves (rec_seq, !optimize_size);
emit_insn (rec_seq);
}
@@ -24560,8 +24788,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
= (!optimize_size) ? std::max (costs[0], costs[1]) : costs[0] + costs[1];
seq_total_cost += insn_cost (zip1_insn, !optimize_size);
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
start_sequence ();
aarch64_expand_vector_init_fallback (target, vals);
@@ -25191,7 +25418,6 @@ aarch64_start_file (void)
}
/* Emit load exclusive. */
-
static void
aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
rtx mem, rtx model_rtx)
@@ -25642,6 +25868,26 @@ aarch64_float_const_representable_p (rtx x)
return aarch64_real_float_const_representable_p (r);
}
+/* Returns the string with the fmov instruction which is equivalent to an and
+ instruction with the SIMD immediate CONST_VECTOR. */
+char*
+aarch64_output_fmov (rtx const_vector)
+{
+ bool is_valid;
+ static char templ[40];
+ char element_char;
+ unsigned int elt_bitsize;
+
+ is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_bitsize);
+ gcc_assert (is_valid);
+
+ element_char = sizetochar (elt_bitsize);
+ snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1", element_char,
+ element_char);
+
+ return templ;
+}
+
/* Returns the string with the instruction for the SIMD immediate
* CONST_VECTOR of MODE and WIDTH. WHICH selects a move, and(bic) or orr. */
char*
@@ -26191,6 +26437,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
newd.testing_p = d->testing_p;
newd.one_vector_p = d->one_vector_p;
+ newd.zero_op0_p = d->zero_op0_p;
+ newd.zero_op1_p = d->zero_op1_p;
newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2,
newpermindices.nelts_per_input ());
@@ -26774,6 +27022,40 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
return true;
}
+/* Recognize patterns suitable for the AND instructions. */
+static bool
+aarch64_evpc_and (struct expand_vec_perm_d *d)
+{
+ /* Either d->op0 or d->op1 should be a vector of all zeros. */
+ if (d->one_vector_p || (!d->zero_op0_p && !d->zero_op1_p))
+ return false;
+
+ machine_mode mode = d->vmode;
+ machine_mode sel_mode;
+ if (!related_int_vector_mode (mode).exists (&sel_mode))
+ return false;
+
+ insn_code and_code = optab_handler (and_optab, sel_mode);
+ rtx and_mask = vec_perm_and_mask (sel_mode, d->perm, d->zero_op0_p);
+ if (and_code == CODE_FOR_nothing || !and_mask)
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ class expand_operand ops[3];
+ rtx in = d->zero_op0_p ? d->op1 : d->op0;
+ create_output_operand (&ops[0], gen_lowpart (sel_mode, d->target), sel_mode);
+ create_input_operand (&ops[1], gen_lowpart (sel_mode, in), sel_mode);
+ create_input_operand (&ops[2], and_mask, sel_mode);
+ expand_insn (and_code, 3, ops);
+ rtx result = gen_lowpart (mode, ops[0].value);
+ if (!rtx_equal_p (d->target, result))
+ emit_move_insn (d->target, result);
+
+ return true;
+}
+
static bool
aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
{
@@ -26809,6 +27091,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
else if (aarch64_evpc_uzp (d))
return true;
+ else if (aarch64_evpc_and (d))
+ return true;
else if (aarch64_evpc_trn (d))
return true;
else if (aarch64_evpc_sel (d))
@@ -26869,11 +27153,17 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
d.op_mode = op_mode;
d.op_vec_flags = aarch64_classify_vector_mode (d.op_mode);
d.target = target;
- d.op0 = op0 ? force_reg (op_mode, op0) : NULL_RTX;
+ d.op0 = op0;
+ if (d.op0 && !register_operand (d.op0, op_mode))
+ d.op0 = force_reg (op_mode, d.op0);
if (op0 && d.one_vector_p)
d.op1 = copy_rtx (d.op0);
else
- d.op1 = op1 ? force_reg (op_mode, op1) : NULL_RTX;
+ {
+ d.op1 = op1;
+ if (d.op1 && !register_operand (d.op1, op_mode))
+ d.op1 = force_reg (op_mode, d.op1);
+ }
d.testing_p = !target;
if (!d.testing_p)
@@ -27688,8 +27978,7 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
end_sequence ();
return NULL_RTX;
}
- *prep_seq = get_insns ();
- end_sequence ();
+ *prep_seq = end_sequence ();
create_fixed_operand (&ops[0], op0);
create_fixed_operand (&ops[1], op1);
@@ -27700,8 +27989,7 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
end_sequence ();
return NULL_RTX;
}
- *gen_seq = get_insns ();
- end_sequence ();
+ *gen_seq = end_sequence ();
return gen_rtx_fmt_ee (code, cc_mode,
gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
@@ -27765,8 +28053,7 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
end_sequence ();
return NULL_RTX;
}
- *prep_seq = get_insns ();
- end_sequence ();
+ *prep_seq = end_sequence ();
target = gen_rtx_REG (cc_mode, CC_REGNUM);
aarch64_cond = aarch64_get_condition_code_1 (cc_mode, cmp_code);
@@ -27805,8 +28092,7 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
return NULL_RTX;
}
- *gen_seq = get_insns ();
- end_sequence ();
+ *gen_seq = end_sequence ();
return gen_rtx_fmt_ee (cmp_code, VOIDmode, target, const0_rtx);
}
@@ -29762,60 +30048,43 @@ aarch64_can_tag_addresses ()
/* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE
section at the end if needed. */
-#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
-#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
-#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
-#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2)
void
aarch64_file_end_indicate_exec_stack ()
{
file_end_indicate_exec_stack ();
- unsigned feature_1_and = 0;
- if (aarch_bti_enabled ())
- feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
-
- if (aarch_ra_sign_scope != AARCH_FUNCTION_NONE)
- feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+ /* Check whether the current assembler supports AEABI build attributes, if
+ not fallback to .note.gnu.property section. */
+ if (HAVE_AS_AEABI_BUILD_ATTRIBUTES)
+ {
+ using namespace aarch64;
+ aeabi_subsection<BA_TagFeature_t, bool, 3>
+ aeabi_subsec ("aeabi_feature_and_bits", true);
- if (aarch64_gcs_enabled ())
- feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+ aeabi_subsec.append (
+ make_aeabi_attribute (Tag_Feature_BTI, aarch_bti_enabled ()));
+ aeabi_subsec.append (
+ make_aeabi_attribute (Tag_Feature_PAC, aarch64_pacret_enabled ()));
+ aeabi_subsec.append (
+ make_aeabi_attribute (Tag_Feature_GCS, aarch64_gcs_enabled ()));
- if (feature_1_and)
+ if (!aeabi_subsec.empty ())
+ aeabi_subsec.write (asm_out_file);
+ }
+ else
{
- /* Generate .note.gnu.property section. */
- switch_to_section (get_section (".note.gnu.property",
- SECTION_NOTYPE, NULL));
+ aarch64::section_note_gnu_property gnu_properties;
- /* PT_NOTE header: namesz, descsz, type.
- namesz = 4 ("GNU\0")
- descsz = 16 (Size of the program property array)
- [(12 + padding) * Number of array elements]
- type = 5 (NT_GNU_PROPERTY_TYPE_0). */
- assemble_align (POINTER_SIZE);
- assemble_integer (GEN_INT (4), 4, 32, 1);
- assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
- assemble_integer (GEN_INT (5), 4, 32, 1);
-
- /* PT_NOTE name. */
- assemble_string ("GNU", 4);
-
- /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
- type = GNU_PROPERTY_AARCH64_FEATURE_1_AND
- datasz = 4
- data = feature_1_and. */
- assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
- assemble_integer (GEN_INT (4), 4, 32, 1);
- assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
-
- /* Pad the size of the note to the required alignment. */
- assemble_align (POINTER_SIZE);
+ if (aarch_bti_enabled ())
+ gnu_properties.bti_enabled ();
+ if (aarch64_pacret_enabled ())
+ gnu_properties.pac_enabled ();
+ if (aarch64_gcs_enabled ())
+ gnu_properties.gcs_enabled ();
+
+ gnu_properties.write ();
}
}
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_GCS
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
/* Helper function for straight line speculation.
Return what barrier should be emitted for straight line speculation
@@ -30391,8 +30660,7 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
aarch64_local_sme_state (prev_mode));
break;
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
/* Get the set of clobbered registers that are currently live. */
HARD_REG_SET clobbers = {};
@@ -30802,8 +31070,7 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
emit_insn (REGNO (x) == ZA_REGNUM
? gen_aarch64_asm_update_za (id_rtx)
: gen_aarch64_asm_update_zt0 (id_rtx));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
auto mode = REGNO (x) == ZA_REGNUM ? VNx16QImode : V8DImode;
uses.safe_push (gen_rtx_REG (mode, REGNO (x)));
@@ -30838,8 +31105,7 @@ aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
args_switch.emit_epilogue ();
if (guard_label)
emit_label (guard_label);
- auto seq = get_insns ();
- end_sequence ();
+ auto seq = end_sequence ();
emit_insn_after (seq, bb_note (bb));
return true;
@@ -30862,8 +31128,7 @@ aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
aarch64_switch_pstate_sm (AARCH64_ISA_MODE_SM_ON, AARCH64_ISA_MODE_SM_OFF);
if (guard_label)
emit_label (guard_label);
- auto seq = get_insns ();
- end_sequence ();
+ auto seq = end_sequence ();
emit_insn_before (seq, jump);
return true;
@@ -30897,8 +31162,7 @@ aarch64_switch_pstate_sm_for_call (rtx_call_insn *call)
args_switch.emit_epilogue ();
if (args_guard_label)
emit_label (args_guard_label);
- auto args_seq = get_insns ();
- end_sequence ();
+ auto args_seq = end_sequence ();
emit_insn_before (args_seq, call);
if (find_reg_note (call, REG_NORETURN, NULL_RTX))
@@ -30918,8 +31182,7 @@ aarch64_switch_pstate_sm_for_call (rtx_call_insn *call)
return_switch.emit_epilogue ();
if (return_guard_label)
emit_label (return_guard_label);
- auto result_seq = get_insns ();
- end_sequence ();
+ auto result_seq = end_sequence ();
emit_insn_after (result_seq, call);
return true;
}
@@ -31073,8 +31336,6 @@ aarch64_valid_sysreg_name_p (const char *regname)
const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
if (sysreg == NULL)
return aarch64_is_implem_def_reg (regname);
- if (sysreg->arch_reqs)
- return bool (aarch64_isa_flags & sysreg->arch_reqs);
return true;
}
@@ -31098,8 +31359,6 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op)
if ((write_p && (sysreg->properties & F_REG_READ))
|| (!write_p && (sysreg->properties & F_REG_WRITE)))
return NULL;
- if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0)
- return NULL;
return sysreg->encoding;
}
@@ -31298,6 +31557,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
}
}
+/* Expand the spaceship optab for floating-point operands.
+
+ If the result is compared against (-1, 0, 1 , 2), expand into
+ fcmpe + conditional branch insns.
+
+ Otherwise (the result is just stored as an integer), expand into
+ fcmpe + a sequence of conditional select/increment/invert insns. */
+void
+aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
+{
+ rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
+
+ rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
+ rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
+ rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
+
+ if (hint == const0_rtx)
+ {
+ rtx un_label = gen_label_rtx ();
+ rtx lt_label = gen_label_rtx ();
+ rtx gt_label = gen_label_rtx ();
+ rtx end_label = gen_label_rtx ();
+
+ rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
+ gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
+ gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
+ gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+ /* Equality. */
+ emit_move_insn (dest, const0_rtx);
+ emit_jump (end_label);
+
+ emit_label (un_label);
+ emit_move_insn (dest, const2_rtx);
+ emit_jump (end_label);
+
+ emit_label (gt_label);
+ emit_move_insn (dest, const1_rtx);
+ emit_jump (end_label);
+
+ emit_label (lt_label);
+ emit_move_insn (dest, constm1_rtx);
+
+ emit_label (end_label);
+ }
+ else
+ {
+ rtx temp0 = gen_reg_rtx (SImode);
+ rtx temp1 = gen_reg_rtx (SImode);
+ rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
+
+ /* The value of hint is stored if the operands are unordered. */
+ rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
+ if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
+ temp_un = force_reg (SImode, temp_un);
+
+ emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+ constm1_rtx, const0_rtx));
+ emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
+ temp_un, const0_rtx));
+ emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
+ gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
+ }
+}
+
/* Target-specific selftests. */
#if CHECKING_P
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 031e621..e11e130 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -136,6 +136,14 @@
;; The actual value can sometimes vary, because it does not track
;; changes to PSTATE.ZA that happen during a lazy save and restore.
;; Those effects are instead tracked by ZA_SAVED_REGNUM.
+ ;;
+ ;; Sequences also write to this register if they synchronize the
+ ;; actual contents of ZA and PSTATE.ZA with the current function's
+ ;; ZA_REGNUM and SME_STATE_REGNUM. Conceptually, these extra writes
+ ;; do not change the value of SME_STATE_REGNUM. They simply act as
+ ;; sequencing points. They means that all direct accesses to ZA can
+ ;; depend only on ZA_REGNUM and SME_STATE_REGNUM, rather than also
+ ;; depending on ZA_SAVED_REGNUM etc.
(SME_STATE_REGNUM 89)
;; Instructions write to this register if they set TPIDR2_EL0 to a
@@ -707,11 +715,12 @@
)
(define_expand "cbranch<mode>4"
- [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
- [(match_operand:GPF 1 "register_operand")
- (match_operand:GPF 2 "aarch64_fp_compare_operand")])
- (label_ref (match_operand 3 "" ""))
- (pc)))]
+ [(set (pc) (if_then_else
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:GPF_F16 1 "register_operand")
+ (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
""
"
operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
@@ -2181,9 +2190,9 @@
"aarch64_mem_pair_offset (operands[4], <MODE>mode)
&& known_eq (INTVAL (operands[5]),
INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))"
- {@ [cons: =&0, 1, =2, =3; attrs: type ]
- [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
- [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
+ {@ [cons: =0, 1, =2, =3; attrs: type ]
+ [ &rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
+ [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
}
)
@@ -2237,9 +2246,9 @@
INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
&& !reg_overlap_mentioned_p (operands[0], operands[2])
&& !reg_overlap_mentioned_p (operands[0], operands[3])"
- {@ [cons: =&0, 1, 2, 3; attrs: type ]
- [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
- [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
+ {@ [cons: =0, 1, 2, 3; attrs: type ]
+ [ &rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
+ [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
}
)
@@ -2485,15 +2494,15 @@
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_pluslong_operand")))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ]
- [ rk , %rk , I ; alu_imm , * ] add\t%<w>0, %<w>1, %2
- [ rk , rk , r ; alu_sreg , * ] add\t%<w>0, %<w>1, %<w>2
- [ w , w , w ; neon_add , simd ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
- [ rk , rk , J ; alu_imm , * ] sub\t%<w>0, %<w>1, #%n2
- [ r , rk , Uaa ; multiple , * ] #
- [ r , 0 , Uai ; alu_imm , sve ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
- [ rk , rk , Uav ; alu_imm , sve ] << aarch64_output_sve_addvl_addpl (operands[2]);
- [ rk , rk , UaV ; alu_imm , sme ] << aarch64_output_addsvl_addspl (operands[2]);
+ {@ [ cons: =0 , %1 , 2 ; attrs: type , arch ]
+ [ rk , rk , I ; alu_imm , * ] add\t%<w>0, %<w>1, %2
+ [ rk , rk , r ; alu_sreg , * ] add\t%<w>0, %<w>1, %<w>2
+ [ w , w , w ; neon_add , simd ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+ [ rk , rk , J ; alu_imm , * ] sub\t%<w>0, %<w>1, #%n2
+ [ r , rk , Uaa ; multiple , * ] #
+ [ r , 0 , Uai ; alu_imm , sve ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
+ [ rk , rk , Uav ; alu_imm , sve ] << aarch64_output_sve_addvl_addpl (operands[2]);
+ [ rk , rk , UaV ; alu_imm , sme ] << aarch64_output_addsvl_addspl (operands[2]);
}
;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
)
@@ -2506,11 +2515,11 @@
(plus:SI (match_operand:SI 1 "register_operand")
(match_operand:SI 2 "aarch64_pluslong_operand"))))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ rk , %rk , I ; alu_imm ] add\t%w0, %w1, %2
- [ rk , rk , r ; alu_sreg ] add\t%w0, %w1, %w2
- [ rk , rk , J ; alu_imm ] sub\t%w0, %w1, #%n2
- [ r , rk , Uaa ; multiple ] #
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ rk , rk , I ; alu_imm ] add\t%w0, %w1, %2
+ [ rk , rk , r ; alu_sreg ] add\t%w0, %w1, %w2
+ [ rk , rk , J ; alu_imm ] sub\t%w0, %w1, #%n2
+ [ r , rk , Uaa ; multiple ] #
}
)
@@ -2579,14 +2588,14 @@
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))]
"TARGET_SVE && operands[0] != stack_pointer_rtx"
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ r , %rk , I ; alu_imm ] add\t%<w>0, %<w>1, %2
- [ r , rk , r ; alu_sreg ] add\t%<w>0, %<w>1, %<w>2
- [ r , rk , J ; alu_imm ] sub\t%<w>0, %<w>1, #%n2
- [ r , rk , Uaa ; multiple ] #
- [ r , 0 , Uai ; alu_imm ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
- [ r , rk , Uav ; alu_imm ] << aarch64_output_sve_addvl_addpl (operands[2]);
- [ &r , rk , Uat ; multiple ] #
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ r , rk , I ; alu_imm ] add\t%<w>0, %<w>1, %2
+ [ r , rk , r ; alu_sreg ] add\t%<w>0, %<w>1, %<w>2
+ [ r , rk , J ; alu_imm ] sub\t%<w>0, %<w>1, #%n2
+ [ r , rk , Uaa ; multiple ] #
+ [ r , 0 , Uai ; alu_imm ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
+ [ r , rk , Uav ; alu_imm ] << aarch64_output_sve_addvl_addpl (operands[2]);
+ [ &r , rk , Uat ; multiple ] #
}
"&& epilogue_completed
&& !reg_overlap_mentioned_p (operands[0], operands[1])
@@ -2758,10 +2767,10 @@
(set (match_operand:GPI 0 "register_operand")
(plus:GPI (match_dup 1) (match_dup 2)))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ r , %rk , r ; alus_sreg ] adds\t%<w>0, %<w>1, %<w>2
- [ r , rk , I ; alus_imm ] adds\t%<w>0, %<w>1, %2
- [ r , rk , J ; alus_imm ] subs\t%<w>0, %<w>1, #%n2
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ r , rk , r ; alus_sreg ] adds\t%<w>0, %<w>1, %<w>2
+ [ r , rk , I ; alus_imm ] adds\t%<w>0, %<w>1, %2
+ [ r , rk , J ; alus_imm ] subs\t%<w>0, %<w>1, #%n2
}
)
@@ -2775,10 +2784,10 @@
(set (match_operand:DI 0 "register_operand")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ r , %rk , r ; alus_sreg ] adds\t%w0, %w1, %w2
- [ r , rk , I ; alus_imm ] adds\t%w0, %w1, %2
- [ r , rk , J ; alus_imm ] subs\t%w0, %w1, #%n2
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ r , rk , r ; alus_sreg ] adds\t%w0, %w1, %w2
+ [ r , rk , I ; alus_imm ] adds\t%w0, %w1, %2
+ [ r , rk , J ; alus_imm ] subs\t%w0, %w1, #%n2
}
)
@@ -2979,10 +2988,10 @@
(match_operand:GPI 1 "aarch64_plus_operand"))
(const_int 0)))]
""
- {@ [ cons: 0 , 1 ; attrs: type ]
- [ %r , r ; alus_sreg ] cmn\t%<w>0, %<w>1
- [ r , I ; alus_imm ] cmn\t%<w>0, %1
- [ r , J ; alus_imm ] cmp\t%<w>0, #%n1
+ {@ [ cons: %0 , 1 ; attrs: type ]
+ [ r , r ; alus_sreg ] cmn\t%<w>0, %<w>1
+ [ r , I ; alus_imm ] cmn\t%<w>0, %1
+ [ r , J ; alus_imm ] cmp\t%<w>0, #%n1
}
)
@@ -4337,26 +4346,28 @@
(define_insn "fcmp<mode>"
[(set (reg:CCFP CC_REGNUM)
- (compare:CCFP (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+ (compare:CCFP
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1 ]
[ w , Y ] fcmp\t%<s>0, #0.0
[ w , w ] fcmp\t%<s>0, %<s>1
}
- [(set_attr "type" "fcmp<s>")]
+ [(set_attr "type" "fcmp<stype>")]
)
(define_insn "fcmpe<mode>"
[(set (reg:CCFPE CC_REGNUM)
- (compare:CCFPE (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+ (compare:CCFPE
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1 ]
[ w , Y ] fcmpe\t%<s>0, #0.0
[ w , w ] fcmpe\t%<s>0, %<s>1
}
- [(set_attr "type" "fcmp<s>")]
+ [(set_attr "type" "fcmp<stype>")]
)
(define_insn "*cmp_swp_<shift>_reg<mode>"
@@ -4392,6 +4403,49 @@
[(set_attr "type" "alus_ext")]
)
+;; <=> operator pattern (integer)
+;; (a == b) ? 0 : (a < b) ? -1 : 1.
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+ {
+ // 1 indicates unsigned comparison, -1 indicates signed.
+ gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
+
+ rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
+ RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
+ RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
+
+ rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
+ rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
+
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
+ const1_rtx, const0_rtx)));
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+ constm1_rtx, temp)));
+ DONE;
+ }
+)
+
+;; <=> operator pattern (floating-point)
+;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:GPF 1 "register_operand")
+ (match_operand:GPF 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_FLOAT"
+ {
+ aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
+ operands[3]);
+ DONE;
+ }
+)
+
;; -------------------------------------------------------------------
;; Store-flag and conditional select insns
;; -------------------------------------------------------------------
@@ -4424,8 +4478,8 @@
(define_expand "cstore<mode>4"
[(set (match_operand:SI 0 "register_operand")
(match_operator:SI 1 "aarch64_comparison_operator_mode"
- [(match_operand:GPF 2 "register_operand")
- (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
+ [(match_operand:GPF_F16 2 "register_operand")
+ (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))]
""
"
operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -4509,38 +4563,6 @@
[(set_attr "type" "csel")]
)
-(define_expand "cmov<mode>6"
- [(set (match_operand:GPI 0 "register_operand")
- (if_then_else:GPI
- (match_operator 1 "aarch64_comparison_operator"
- [(match_operand:GPI 2 "register_operand")
- (match_operand:GPI 3 "aarch64_plus_operand")])
- (match_operand:GPI 4 "register_operand")
- (match_operand:GPI 5 "register_operand")))]
- ""
- "
- operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
- operands[3]);
- operands[3] = const0_rtx;
- "
-)
-
-(define_expand "cmov<mode>6"
- [(set (match_operand:GPF 0 "register_operand")
- (if_then_else:GPF
- (match_operator 1 "aarch64_comparison_operator"
- [(match_operand:GPF 2 "register_operand")
- (match_operand:GPF 3 "aarch64_fp_compare_operand")])
- (match_operand:GPF 4 "register_operand")
- (match_operand:GPF 5 "register_operand")))]
- ""
- "
- operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
- operands[3]);
- operands[3] = const0_rtx;
- "
-)
-
(define_insn "*cmov<mode>_insn"
[(set (match_operand:ALLI 0 "register_operand")
(if_then_else:ALLI
@@ -5045,8 +5067,8 @@
(LOGICAL:GPI (match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_logical_operand")))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ]
- [ r , %r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2
+ {@ [ cons: =0 , %1 , 2 ; attrs: type , arch ]
+ [ r , r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2
[ rk , r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2
[ w , 0 , <lconst> ; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
[ w , w , w ; neon_logic , simd ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
@@ -5060,8 +5082,8 @@
(LOGICAL:SI (match_operand:SI 1 "register_operand")
(match_operand:SI 2 "aarch64_logical_operand"))))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ r , %r , r ; logic_reg ] <logical>\t%w0, %w1, %w2
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ r , r , r ; logic_reg ] <logical>\t%w0, %w1, %w2
[ rk , r , K ; logic_imm ] <logical>\t%w0, %w1, %2
}
)
@@ -5075,8 +5097,8 @@
(set (match_operand:GPI 0 "register_operand")
(and:GPI (match_dup 1) (match_dup 2)))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ r , %r , r ; logics_reg ] ands\t%<w>0, %<w>1, %<w>2
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ r , r , r ; logics_reg ] ands\t%<w>0, %<w>1, %<w>2
[ r , r , <lconst> ; logics_imm ] ands\t%<w>0, %<w>1, %2
}
)
@@ -5091,8 +5113,8 @@
(set (match_operand:DI 0 "register_operand")
(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
""
- {@ [ cons: =0 , 1 , 2 ; attrs: type ]
- [ r , %r , r ; logics_reg ] ands\t%w0, %w1, %w2
+ {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+ [ r , r , r ; logics_reg ] ands\t%w0, %w1, %w2
[ r , r , K ; logics_imm ] ands\t%w0, %w1, %2
}
)
@@ -5676,9 +5698,9 @@
(match_operand:GPI 1 "aarch64_logical_operand"))
(const_int 0)))]
""
- {@ [ cons: 0 , 1 ; attrs: type ]
- [ %r , r ; logics_reg ] tst\t%<w>0, %<w>1
- [ r , <lconst> ; logics_imm ] tst\t%<w>0, %1
+ {@ [ cons: %0 , 1 ; attrs: type ]
+ [ r , r ; logics_reg ] tst\t%<w>0, %<w>1
+ [ r , <lconst> ; logics_imm ] tst\t%<w>0, %1
}
)
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index d9e2401..507b6e7 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -118,6 +118,20 @@ __revl (unsigned long __value)
return __rev (__value);
}
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sqrt (double __x)
+{
+ return __builtin_aarch64_sqrtdf (__x);
+}
+
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sqrtf (float __x)
+{
+ return __builtin_aarch64_sqrtsf (__x);
+}
+
#pragma GCC push_options
#pragma GCC target ("+nothing+jscvt")
__extension__ extern __inline int32_t
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index e8321c4..e9f69f8 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -466,6 +466,13 @@
(and (match_code "const_vector")
(match_test "aarch64_simd_valid_orr_imm (op)")))
+(define_constraint "Df"
+ "@internal
+ A constraint that matches a vector of immediates for and which can be
+ optimized as fmov."
+ (and (match_code "const_vector")
+ (match_test "aarch64_simd_valid_and_imm_fmov (op)")))
+
(define_constraint "Db"
"@internal
A constraint that matches vector of immediates for and/bic."
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index 9d99554..0333746 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -63,7 +63,7 @@ struct aarch64_core_data
#define DEFAULT_CPU "generic-armv8-a"
#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
- { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT },
+ { CORE_NAME, #ARCH, IMP, PART, unsigned(VARIANT), feature_deps::cpu_##CORE_IDENT },
static CONSTEXPR const aarch64_core_data aarch64_cpu_data[] =
{
diff --git a/gcc/config/aarch64/gcc-auto-profile b/gcc/config/aarch64/gcc-auto-profile
new file mode 100755
index 0000000..4d5c2e3
--- /dev/null
+++ b/gcc/config/aarch64/gcc-auto-profile
@@ -0,0 +1,53 @@
+#!/bin/sh
+# Profile workload for gcc profile feedback (autofdo) using Linux perf.
+# Copyright The GNU Toolchain Authors.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+# Run perf record with branch stack sampling and check for
+# specific error message to see if it is supported.
+use_brbe=true
+output=$(perf record -j any,u /bin/true 2>&1)
+case "$output" in
+ *"PMU Hardware or event type doesn't support branch stack sampling."*)
+ use_brbe=false;;
+ *)
+ use_brbe=true;;
+esac
+
+FLAGS=u
+if [ "$1" = "--kernel" ] ; then
+ FLAGS=k
+ shift
+elif [ "$1" = "--all" ] ; then
+ FLAGS=u,k
+ shift
+fi
+
+if [ "$use_brbe" = true ] ; then
+ if grep -q hypervisor /proc/cpuinfo ; then
+ echo >&2 "Warning: branch profiling may not be functional in VMs"
+ fi
+ set -x
+ perf record -j any,$FLAGS "$@"
+ set +x
+else
+ echo >&2 "Warning: branch profiling may not be functional without BRBE"
+ set -x
+ perf record "$@"
+ set +x
+fi
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 146453b..a8957681 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -479,13 +479,25 @@
;; All fully-packed SVE integer and Advanced SIMD integer modes.
(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])
-;; All fully-packed SVE floating-point vector modes.
+;; Fully-packed SVE floating-point vector modes, excluding BF16.
(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
+;; Partial SVE floating-point vector modes, excluding BF16.
+(define_mode_iterator SVE_PARTIAL_F [VNx2HF VNx4HF VNx2SF])
+
+;; SVE floating-point vector modes, excluding BF16.
+(define_mode_iterator SVE_F [SVE_PARTIAL_F SVE_FULL_F])
+
;; Fully-packed SVE floating-point vector modes and their scalar equivalents.
(define_mode_iterator SVE_FULL_F_SCALAR [SVE_FULL_F GPF_HF])
-(define_mode_iterator SVE_FULL_F_BF [(VNx8BF "TARGET_SSVE_B16B16") SVE_FULL_F])
+(define_mode_iterator SVE_FULL_F_B16B16 [(VNx8BF "TARGET_SSVE_B16B16") SVE_FULL_F])
+
+(define_mode_iterator SVE_PARTIAL_F_B16B16 [(VNx2BF "TARGET_SSVE_B16B16")
+ (VNx4BF "TARGET_SSVE_B16B16")
+ SVE_PARTIAL_F])
+
+(define_mode_iterator SVE_F_B16B16 [SVE_PARTIAL_F_B16B16 SVE_FULL_F_B16B16])
;; Modes for which (B)FCLAMP is supported.
(define_mode_iterator SVE_CLAMP_F [(VNx8BF "TARGET_SSVE_B16B16")
@@ -529,6 +541,13 @@
;; elements.
(define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
+;; Partial SVE floating-point vector modes that have 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_PARTIAL_HSF [VNx2HF VNx4HF VNx2SF])
+
+;; SVE floating-point vector modes that have 16-bit or 32-bit elements.
+(define_mode_iterator SVE_HSF [SVE_PARTIAL_HSF SVE_FULL_HSF])
+
;; Fully-packed SVE integer vector modes that have 16-bit or 64-bit elements.
(define_mode_iterator SVE_FULL_HDI [VNx8HI VNx2DI])
@@ -553,6 +572,9 @@
(define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
(VNx2DF "TARGET_SVE_F64MM")])
+;; SVE floating-point vector modes that have 32-bit or 64-bit elements.
+(define_mode_iterator SVE_SDF [VNx2SF SVE_FULL_SDF])
+
;; Fully-packed SVE vector modes that have 32-bit or smaller elements.
(define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
VNx8BF VNx8HF VNx4SF])
@@ -580,14 +602,13 @@
VNx4SI VNx2SI
VNx2DI])
+(define_mode_iterator SVE_BF [VNx2BF VNx4BF VNx8BF])
+
;; All SVE floating-point vector modes.
-(define_mode_iterator SVE_F [VNx8HF VNx4HF VNx2HF
- VNx8BF VNx4BF VNx2BF
- VNx4SF VNx2SF
- VNx2DF])
+(define_mode_iterator SVE_F_BF [SVE_F SVE_BF])
;; All SVE vector modes.
-(define_mode_iterator SVE_ALL [SVE_I SVE_F])
+(define_mode_iterator SVE_ALL [SVE_I SVE_F_BF])
;; All SVE 2-vector modes.
(define_mode_iterator SVE_FULLx2 [VNx32QI VNx16HI VNx8SI VNx4DI
@@ -623,6 +644,9 @@
VNx4SI VNx2SI
VNx2DI])
+;; SVE integer vector modes with 32-bit elements.
+(define_mode_iterator SVE_SI [VNx2SI VNx4SI])
+
(define_mode_iterator SVE_DIx24 [VNx4DI VNx8DI])
;; SVE modes with 2 or 4 elements.
@@ -638,6 +662,9 @@
(define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2BF
VNx2SI VNx2SF VNx2DI VNx2DF])
+;; SVE SI and DI modes with 2 elements.
+(define_mode_iterator SVE_2SDI [VNx2SI VNx2DI])
+
;; SVE integer modes with 2 elements, excluding the widest element.
(define_mode_iterator SVE_2BHSI [VNx2QI VNx2HI VNx2SI])
@@ -1678,6 +1705,30 @@
(SI "SI") (HI "HI")
(QI "QI")
(V4BF "BF") (V8BF "BF")
+ (V2x8QI "QI") (V2x4HI "HI")
+ (V2x2SI "SI") (V2x1DI "DI")
+ (V2x4HF "HF") (V2x2SF "SF")
+ (V2x1DF "DF") (V2x4BF "BF")
+ (V3x8QI "QI") (V3x4HI "HI")
+ (V3x2SI "SI") (V3x1DI "DI")
+ (V3x4HF "HF") (V3x2SF "SF")
+ (V3x1DF "DF") (V3x4BF "BF")
+ (V4x8QI "QI") (V4x4HI "HI")
+ (V4x2SI "SI") (V4x1DI "DI")
+ (V4x4HF "HF") (V4x2SF "SF")
+ (V4x1DF "DF") (V4x4BF "BF")
+ (V2x16QI "QI") (V2x8HI "HI")
+ (V2x4SI "SI") (V2x2DI "DI")
+ (V2x8HF "HF") (V2x4SF "SF")
+ (V2x2DF "DF") (V2x8BF "BF")
+ (V3x16QI "QI") (V3x8HI "HI")
+ (V3x4SI "SI") (V3x2DI "DI")
+ (V3x8HF "HF") (V3x4SF "SF")
+ (V3x2DF "DF") (V3x8BF "BF")
+ (V4x16QI "QI") (V4x8HI "HI")
+ (V4x4SI "SI") (V4x2DI "DI")
+ (V4x8HF "HF") (V4x4SF "SF")
+ (V4x2DF "DF") (V4x8BF "BF")
(VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
(VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
(VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
@@ -1699,6 +1750,30 @@
(DF "df") (SI "si")
(HI "hi") (QI "qi")
(V4BF "bf") (V8BF "bf")
+ (V2x8QI "qi") (V2x4HI "hi")
+ (V2x2SI "si") (V2x1DI "di")
+ (V2x4HF "hf") (V2x2SF "sf")
+ (V2x1DF "df") (V2x4BF "bf")
+ (V3x8QI "qi") (V3x4HI "hi")
+ (V3x2SI "si") (V3x1DI "di")
+ (V3x4HF "hf") (V3x2SF "sf")
+ (V3x1DF "df") (V3x4BF "bf")
+ (V4x8QI "qi") (V4x4HI "hi")
+ (V4x2SI "si") (V4x1DI "di")
+ (V4x4HF "hf") (V4x2SF "sf")
+ (V4x1DF "df") (V4x4BF "bf")
+ (V2x16QI "qi") (V2x8HI "hi")
+ (V2x4SI "si") (V2x2DI "di")
+ (V2x8HF "hf") (V2x4SF "sf")
+ (V2x2DF "df") (V2x8BF "bf")
+ (V3x16QI "qi") (V3x8HI "hi")
+ (V3x4SI "si") (V3x2DI "di")
+ (V3x8HF "hf") (V3x4SF "sf")
+ (V3x2DF "df") (V3x8BF "bf")
+ (V4x16QI "qi") (V4x8HI "hi")
+ (V4x4SI "si") (V4x2DI "di")
+ (V4x8HF "hf") (V4x4SF "sf")
+ (V4x2DF "df") (V4x8BF "bf")
(VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
(VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
(VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
@@ -2445,7 +2520,9 @@
(VNx8DI "vnx2di") (VNx8DF "vnx2df")])
;; The predicate mode associated with an SVE data mode. For structure modes
-;; this is equivalent to the <VPRED> of the subvector mode.
+;; this is equivalent to the <VPRED> of the subvector mode. For partial
+;; vector modes, this is equivalent to the <VPRED> of a full SVE mode with
+;; the same number of elements.
(define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
(VNx4QI "VNx4BI") (VNx2QI "VNx2BI")
(VNx8HI "VNx8BI") (VNx4HI "VNx4BI") (VNx2HI "VNx2BI")
@@ -2583,19 +2660,22 @@
(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
(VNx4BI "4") (VNx2BI "8")])
-;; Two-nybble mask for partial vector modes: nunits, byte size.
-(define_mode_attr self_mask [(VNx8QI "0x81")
- (VNx4QI "0x41")
- (VNx2QI "0x21")
- (VNx4HI "0x42")
- (VNx2HI "0x22")
- (VNx2SI "0x24")])
-
-;; For SVE_HSDI vector modes, the mask of narrower modes, encoded as above.
-(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
- (VNx2HI "0x21")
- (VNx4SI "0x43") (VNx2SI "0x23")
- (VNx2DI "0x27")])
+;; Two-nybble mask for vector modes: nunits, byte size.
+(define_mode_attr self_mask [(VNx2HI "0x22") (VNx2HF "0x22")
+ (VNx4HI "0x42") (VNx4HF "0x42")
+ (VNx8HI "0x82") (VNx8HF "0x82")
+ (VNx2SI "0x24") (VNx2SF "0x24")
+ (VNx4SI "0x44") (VNx4SF "0x44")
+ (VNx2DI "0x28") (VNx2DF "0x28")
+ (VNx8QI "0x81") (VNx4QI "0x41") (VNx2QI "0x21")])
+
+;; The mask of narrower vector modes, encoded as above.
+(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx8HF "0x81")
+ (VNx4HI "0x41") (VNx4HF "0x41")
+ (VNx2HI "0x21") (VNx2HF "0x21")
+ (VNx4SI "0x43") (VNx4SF "0x43")
+ (VNx2SI "0x23") (VNx2SF "0x23")
+ (VNx2DI "0x27") (VNx2DF "0x27")])
;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
(define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
@@ -2611,13 +2691,15 @@
(V2DI "vec") (DI "offset")])
(define_mode_attr b [(V4BF "b") (V4HF "") (V8BF "b") (V8HF "")
- (VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")
+ (VNx2BF "b") (VNx2HF "") (VNx2SF "")
+ (VNx4BF "b") (VNx4HF "") (VNx4SF "")
+ (VNx8BF "b") (VNx8HF "") (VNx2DF "")
(VNx16BF "b") (VNx16HF "") (VNx8SF "") (VNx4DF "")
(VNx32BF "b") (VNx32HF "") (VNx16SF "") (VNx8DF "")])
-(define_mode_attr is_bf16 [(VNx8BF "true")
- (VNx8HF "false")
- (VNx4SF "false")
+(define_mode_attr is_bf16 [(VNx2BF "true") (VNx4BF "true") (VNx8BF "true")
+ (VNx2HF "false") (VNx4HF "false") (VNx8HF "false")
+ (VNx2SF "false") (VNx4SF "false")
(VNx2DF "false")])
(define_mode_attr aligned_operand [(VNx16QI "register_operand")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 1ab1c69..d8e9725 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -123,7 +123,8 @@
(define_predicate "aarch64_reg_or_and_imm"
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
- (match_test "aarch64_simd_valid_and_imm (op)"))))
+ (ior (match_test "aarch64_simd_valid_and_imm (op)")
+ (match_test "aarch64_simd_valid_and_imm_fmov (op)")))))
(define_predicate "aarch64_reg_or_xor_imm"
(ior (match_operand 0 "register_operand")
@@ -586,6 +587,11 @@
return aarch64_simd_shift_imm_p (op, mode, false);
})
+(define_special_predicate "aarch64_predicate_operand"
+ (and (match_code "reg,subreg")
+ (match_test "register_operand (op, GET_MODE (op))")
+ (match_test "aarch64_sve_valid_pred_p (op, mode)")))
+
(define_predicate "aarch64_simd_imm_zero"
(and (match_code "const,const_vector")
(match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 5957194..38a8c06 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -140,6 +140,17 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.cc $(CONFIG_H) $(SYSTEM_H) \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/arm/aarch-common.cc
+aarch64-elf-metadata.o: $(srcdir)/config/aarch64/aarch64-elf-metadata.cc \
+ $(CONFIG_H) \
+ $(BACKEND_H) \
+ $(RTL_H) \
+ $(SYSTEM_H) \
+ $(TARGET_H) \
+ $(srcdir)/config/aarch64/aarch64-elf-metadata.h \
+ output.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/aarch64/aarch64-elf-metadata.cc
+
aarch64-c.o: $(srcdir)/config/aarch64/aarch64-c.cc $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index ba470d9..5082c1c 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -1036,8 +1036,7 @@ alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
emit_libcall_block (insn, dest, r0, x);
return dest;
@@ -1059,8 +1058,7 @@ alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
UNSPEC_TLSLDM_CALL);
@@ -3214,8 +3212,7 @@ alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
CALL_INSN_FUNCTION_USAGE (tmp) = usage;
RTL_CONST_CALL_P (tmp) = 1;
- tmp = get_insns ();
- end_sequence ();
+ tmp = end_sequence ();
emit_libcall_block (tmp, target, reg, equiv);
}
@@ -4291,14 +4288,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
tree mem = MEM_EXPR (expr);
if (mem != NULL_TREE)
- switch (TREE_CODE (mem))
- {
- case MEM_REF:
- tree_offset = mem_ref_offset (mem).force_shwi ();
- tree_align = get_object_alignment (get_base_address (mem));
- break;
+ {
+ HOST_WIDE_INT comp_offset = 0;
- case COMPONENT_REF:
+ for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
{
tree byte_offset = component_ref_field_offset (mem);
tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
@@ -4307,14 +4300,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
|| !poly_int_tree_p (byte_offset, &offset)
|| !tree_fits_shwi_p (bit_offset))
break;
- tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+ comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
}
- tree_align = get_object_alignment (get_base_address (mem));
- break;
- default:
- break;
- }
+ if (TREE_CODE (mem) == MEM_REF)
+ {
+ tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+ tree_align = get_object_alignment (get_base_address (mem));
+ }
+ }
if (reg_align > mem_align)
{
@@ -5599,8 +5593,7 @@ alpha_gp_save_rtx (void)
m = validize_mem (m);
emit_move_insn (m, pic_offset_table_rtx);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
/* We used to simply emit the sequence after entry_of_function.
However this breaks the CFG if the first instruction in the
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index be4bd61..78ba814 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -720,8 +720,6 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P \
arc_no_speculation_in_delay_slots_p
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_true
#define TARGET_REGISTER_PRIORITY arc_register_priority
/* Stores with scaled offsets have different displacement ranges. */
#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
@@ -8220,8 +8218,7 @@ hwloop_optimize (hwloop_info loop)
insn = emit_insn (gen_arc_lp (loop->start_label,
loop->end_label));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
entry_after = BB_END (entry_bb);
if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc
index 3289853..9cd926e 100644
--- a/gcc/config/arm/aarch-common.cc
+++ b/gcc/config/arm/aarch-common.cc
@@ -655,8 +655,7 @@ arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
emit_move_insn (dest, tmp);
}
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
return saw_asm_flag ? seq : NULL;
}
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index cdce361..a9b0dfa 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -69,11 +69,6 @@
"d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23", \
"d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27", \
"d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31", \
- "wr0", "wr1", "wr2", "wr3", \
- "wr4", "wr5", "wr6", "wr7", \
- "wr8", "wr9", "wr10", "wr11", \
- "wr12", "wr13", "wr14", "wr15", \
- "wcgr0", "wcgr1", "wcgr2", "wcgr3", \
"cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0", \
"ra_auth_code" \
}
diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index c56ab5d..3bb2566 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -816,252 +816,6 @@ static arm_builtin_cde_datum cde_builtin_data[] =
enum arm_builtins
{
- ARM_BUILTIN_GETWCGR0,
- ARM_BUILTIN_GETWCGR1,
- ARM_BUILTIN_GETWCGR2,
- ARM_BUILTIN_GETWCGR3,
-
- ARM_BUILTIN_SETWCGR0,
- ARM_BUILTIN_SETWCGR1,
- ARM_BUILTIN_SETWCGR2,
- ARM_BUILTIN_SETWCGR3,
-
- ARM_BUILTIN_WZERO,
-
- ARM_BUILTIN_WAVG2BR,
- ARM_BUILTIN_WAVG2HR,
- ARM_BUILTIN_WAVG2B,
- ARM_BUILTIN_WAVG2H,
-
- ARM_BUILTIN_WACCB,
- ARM_BUILTIN_WACCH,
- ARM_BUILTIN_WACCW,
-
- ARM_BUILTIN_WMACS,
- ARM_BUILTIN_WMACSZ,
- ARM_BUILTIN_WMACU,
- ARM_BUILTIN_WMACUZ,
-
- ARM_BUILTIN_WSADB,
- ARM_BUILTIN_WSADBZ,
- ARM_BUILTIN_WSADH,
- ARM_BUILTIN_WSADHZ,
-
- ARM_BUILTIN_WALIGNI,
- ARM_BUILTIN_WALIGNR0,
- ARM_BUILTIN_WALIGNR1,
- ARM_BUILTIN_WALIGNR2,
- ARM_BUILTIN_WALIGNR3,
-
- ARM_BUILTIN_TMIA,
- ARM_BUILTIN_TMIAPH,
- ARM_BUILTIN_TMIABB,
- ARM_BUILTIN_TMIABT,
- ARM_BUILTIN_TMIATB,
- ARM_BUILTIN_TMIATT,
-
- ARM_BUILTIN_TMOVMSKB,
- ARM_BUILTIN_TMOVMSKH,
- ARM_BUILTIN_TMOVMSKW,
-
- ARM_BUILTIN_TBCSTB,
- ARM_BUILTIN_TBCSTH,
- ARM_BUILTIN_TBCSTW,
-
- ARM_BUILTIN_WMADDS,
- ARM_BUILTIN_WMADDU,
-
- ARM_BUILTIN_WPACKHSS,
- ARM_BUILTIN_WPACKWSS,
- ARM_BUILTIN_WPACKDSS,
- ARM_BUILTIN_WPACKHUS,
- ARM_BUILTIN_WPACKWUS,
- ARM_BUILTIN_WPACKDUS,
-
- ARM_BUILTIN_WADDB,
- ARM_BUILTIN_WADDH,
- ARM_BUILTIN_WADDW,
- ARM_BUILTIN_WADDSSB,
- ARM_BUILTIN_WADDSSH,
- ARM_BUILTIN_WADDSSW,
- ARM_BUILTIN_WADDUSB,
- ARM_BUILTIN_WADDUSH,
- ARM_BUILTIN_WADDUSW,
- ARM_BUILTIN_WSUBB,
- ARM_BUILTIN_WSUBH,
- ARM_BUILTIN_WSUBW,
- ARM_BUILTIN_WSUBSSB,
- ARM_BUILTIN_WSUBSSH,
- ARM_BUILTIN_WSUBSSW,
- ARM_BUILTIN_WSUBUSB,
- ARM_BUILTIN_WSUBUSH,
- ARM_BUILTIN_WSUBUSW,
-
- ARM_BUILTIN_WAND,
- ARM_BUILTIN_WANDN,
- ARM_BUILTIN_WOR,
- ARM_BUILTIN_WXOR,
-
- ARM_BUILTIN_WCMPEQB,
- ARM_BUILTIN_WCMPEQH,
- ARM_BUILTIN_WCMPEQW,
- ARM_BUILTIN_WCMPGTUB,
- ARM_BUILTIN_WCMPGTUH,
- ARM_BUILTIN_WCMPGTUW,
- ARM_BUILTIN_WCMPGTSB,
- ARM_BUILTIN_WCMPGTSH,
- ARM_BUILTIN_WCMPGTSW,
-
- ARM_BUILTIN_TEXTRMSB,
- ARM_BUILTIN_TEXTRMSH,
- ARM_BUILTIN_TEXTRMSW,
- ARM_BUILTIN_TEXTRMUB,
- ARM_BUILTIN_TEXTRMUH,
- ARM_BUILTIN_TEXTRMUW,
- ARM_BUILTIN_TINSRB,
- ARM_BUILTIN_TINSRH,
- ARM_BUILTIN_TINSRW,
-
- ARM_BUILTIN_WMAXSW,
- ARM_BUILTIN_WMAXSH,
- ARM_BUILTIN_WMAXSB,
- ARM_BUILTIN_WMAXUW,
- ARM_BUILTIN_WMAXUH,
- ARM_BUILTIN_WMAXUB,
- ARM_BUILTIN_WMINSW,
- ARM_BUILTIN_WMINSH,
- ARM_BUILTIN_WMINSB,
- ARM_BUILTIN_WMINUW,
- ARM_BUILTIN_WMINUH,
- ARM_BUILTIN_WMINUB,
-
- ARM_BUILTIN_WMULUM,
- ARM_BUILTIN_WMULSM,
- ARM_BUILTIN_WMULUL,
-
- ARM_BUILTIN_PSADBH,
- ARM_BUILTIN_WSHUFH,
-
- ARM_BUILTIN_WSLLH,
- ARM_BUILTIN_WSLLW,
- ARM_BUILTIN_WSLLD,
- ARM_BUILTIN_WSRAH,
- ARM_BUILTIN_WSRAW,
- ARM_BUILTIN_WSRAD,
- ARM_BUILTIN_WSRLH,
- ARM_BUILTIN_WSRLW,
- ARM_BUILTIN_WSRLD,
- ARM_BUILTIN_WRORH,
- ARM_BUILTIN_WRORW,
- ARM_BUILTIN_WRORD,
- ARM_BUILTIN_WSLLHI,
- ARM_BUILTIN_WSLLWI,
- ARM_BUILTIN_WSLLDI,
- ARM_BUILTIN_WSRAHI,
- ARM_BUILTIN_WSRAWI,
- ARM_BUILTIN_WSRADI,
- ARM_BUILTIN_WSRLHI,
- ARM_BUILTIN_WSRLWI,
- ARM_BUILTIN_WSRLDI,
- ARM_BUILTIN_WRORHI,
- ARM_BUILTIN_WRORWI,
- ARM_BUILTIN_WRORDI,
-
- ARM_BUILTIN_WUNPCKIHB,
- ARM_BUILTIN_WUNPCKIHH,
- ARM_BUILTIN_WUNPCKIHW,
- ARM_BUILTIN_WUNPCKILB,
- ARM_BUILTIN_WUNPCKILH,
- ARM_BUILTIN_WUNPCKILW,
-
- ARM_BUILTIN_WUNPCKEHSB,
- ARM_BUILTIN_WUNPCKEHSH,
- ARM_BUILTIN_WUNPCKEHSW,
- ARM_BUILTIN_WUNPCKEHUB,
- ARM_BUILTIN_WUNPCKEHUH,
- ARM_BUILTIN_WUNPCKEHUW,
- ARM_BUILTIN_WUNPCKELSB,
- ARM_BUILTIN_WUNPCKELSH,
- ARM_BUILTIN_WUNPCKELSW,
- ARM_BUILTIN_WUNPCKELUB,
- ARM_BUILTIN_WUNPCKELUH,
- ARM_BUILTIN_WUNPCKELUW,
-
- ARM_BUILTIN_WABSB,
- ARM_BUILTIN_WABSH,
- ARM_BUILTIN_WABSW,
-
- ARM_BUILTIN_WADDSUBHX,
- ARM_BUILTIN_WSUBADDHX,
-
- ARM_BUILTIN_WABSDIFFB,
- ARM_BUILTIN_WABSDIFFH,
- ARM_BUILTIN_WABSDIFFW,
-
- ARM_BUILTIN_WADDCH,
- ARM_BUILTIN_WADDCW,
-
- ARM_BUILTIN_WAVG4,
- ARM_BUILTIN_WAVG4R,
-
- ARM_BUILTIN_WMADDSX,
- ARM_BUILTIN_WMADDUX,
-
- ARM_BUILTIN_WMADDSN,
- ARM_BUILTIN_WMADDUN,
-
- ARM_BUILTIN_WMULWSM,
- ARM_BUILTIN_WMULWUM,
-
- ARM_BUILTIN_WMULWSMR,
- ARM_BUILTIN_WMULWUMR,
-
- ARM_BUILTIN_WMULWL,
-
- ARM_BUILTIN_WMULSMR,
- ARM_BUILTIN_WMULUMR,
-
- ARM_BUILTIN_WQMULM,
- ARM_BUILTIN_WQMULMR,
-
- ARM_BUILTIN_WQMULWM,
- ARM_BUILTIN_WQMULWMR,
-
- ARM_BUILTIN_WADDBHUSM,
- ARM_BUILTIN_WADDBHUSL,
-
- ARM_BUILTIN_WQMIABB,
- ARM_BUILTIN_WQMIABT,
- ARM_BUILTIN_WQMIATB,
- ARM_BUILTIN_WQMIATT,
-
- ARM_BUILTIN_WQMIABBN,
- ARM_BUILTIN_WQMIABTN,
- ARM_BUILTIN_WQMIATBN,
- ARM_BUILTIN_WQMIATTN,
-
- ARM_BUILTIN_WMIABB,
- ARM_BUILTIN_WMIABT,
- ARM_BUILTIN_WMIATB,
- ARM_BUILTIN_WMIATT,
-
- ARM_BUILTIN_WMIABBN,
- ARM_BUILTIN_WMIABTN,
- ARM_BUILTIN_WMIATBN,
- ARM_BUILTIN_WMIATTN,
-
- ARM_BUILTIN_WMIAWBB,
- ARM_BUILTIN_WMIAWBT,
- ARM_BUILTIN_WMIAWTB,
- ARM_BUILTIN_WMIAWTT,
-
- ARM_BUILTIN_WMIAWBBN,
- ARM_BUILTIN_WMIAWBTN,
- ARM_BUILTIN_WMIAWTBN,
- ARM_BUILTIN_WMIAWTTN,
-
- ARM_BUILTIN_WMERGE,
-
ARM_BUILTIN_GET_FPSCR,
ARM_BUILTIN_SET_FPSCR,
ARM_BUILTIN_GET_FPSCR_NZCVQC,
@@ -1878,115 +1632,6 @@ struct builtin_description
static const struct builtin_description bdesc_2arg[] =
{
-#define IWMMXT_BUILTIN(code, string, builtin) \
- { isa_bit_iwmmxt, CODE_FOR_##code, \
- "__builtin_arm_" string, \
- ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN(code, string, builtin) \
- { isa_bit_iwmmxt2, CODE_FOR_##code, \
- "__builtin_arm_" string, \
- ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
- IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
- IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
- IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
- IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
- IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
- IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
- IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
- IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
- IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
- IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
- IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
- IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
- IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
- IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
- IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
- IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
- IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
- IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
- IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
- IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
- IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
- IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
- IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
- IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
- IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
- IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
- IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
- IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
- IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
- IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
- IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
- IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
- IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
- IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
- IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
- IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
- IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
- IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
- IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
- IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
- IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
- IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
- IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
- IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
- IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
- IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
- IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
- IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
- IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
- IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
- IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
- IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
- IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
- IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
- IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
- IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
- IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
- IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
- IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
- IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
- IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
- IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
- IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
- IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
- IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
- IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
- IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
- IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
- IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
- IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
- IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
- IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
- IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
- IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
- IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
- IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
- IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
- IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
-
-#define IWMMXT_BUILTIN2(code, builtin) \
- { isa_bit_iwmmxt, CODE_FOR_##code, NULL, \
- ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN2(code, builtin) \
- { isa_bit_iwmmxt2, CODE_FOR_##code, NULL, \
- ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
- IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
- IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
- IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
- IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
- IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
- IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
-
-
#define FP_BUILTIN(L, U) \
{isa_nobit, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
UNKNOWN, 0},
@@ -2013,31 +1658,6 @@ static const struct builtin_description bdesc_2arg[] =
static const struct builtin_description bdesc_1arg[] =
{
- IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
- IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
- IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
- IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
- IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
- IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
- IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
- IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
- IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
- IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
- IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
- IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
- IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
- IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
- IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
- IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
- IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
- IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
- IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
- IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
- IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
- IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
- IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
- IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
-
#define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
#define CRYPTO2(L, U, R, A1, A2)
#define CRYPTO3(L, U, R, A1, A2, A3)
@@ -2059,387 +1679,6 @@ static const struct builtin_description bdesc_3arg[] =
};
#undef CRYPTO_BUILTIN
-/* Set up all the iWMMXt builtins. This is not called if
- TARGET_IWMMXT is zero. */
-
-static void
-arm_init_iwmmxt_builtins (void)
-{
- const struct builtin_description * d;
- size_t i;
-
- tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
- tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
- tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
-
- tree v8qi_ftype_v8qi_v8qi_int
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, V8QI_type_node,
- integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_int
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, integer_type_node, NULL_TREE);
- tree v2si_ftype_v2si_int
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, integer_type_node, NULL_TREE);
- tree v2si_ftype_di_di
- = build_function_type_list (V2SI_type_node,
- long_long_integer_type_node,
- long_long_integer_type_node,
- NULL_TREE);
- tree di_ftype_di_int
- = build_function_type_list (long_long_integer_type_node,
- long_long_integer_type_node,
- integer_type_node, NULL_TREE);
- tree di_ftype_di_int_int
- = build_function_type_list (long_long_integer_type_node,
- long_long_integer_type_node,
- integer_type_node,
- integer_type_node, NULL_TREE);
- tree int_ftype_v8qi
- = build_function_type_list (integer_type_node,
- V8QI_type_node, NULL_TREE);
- tree int_ftype_v4hi
- = build_function_type_list (integer_type_node,
- V4HI_type_node, NULL_TREE);
- tree int_ftype_v2si
- = build_function_type_list (integer_type_node,
- V2SI_type_node, NULL_TREE);
- tree int_ftype_v8qi_int
- = build_function_type_list (integer_type_node,
- V8QI_type_node, integer_type_node, NULL_TREE);
- tree int_ftype_v4hi_int
- = build_function_type_list (integer_type_node,
- V4HI_type_node, integer_type_node, NULL_TREE);
- tree int_ftype_v2si_int
- = build_function_type_list (integer_type_node,
- V2SI_type_node, integer_type_node, NULL_TREE);
- tree v8qi_ftype_v8qi_int_int
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_int_int
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- tree v2si_ftype_v2si_int_int
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- /* Miscellaneous. */
- tree v8qi_ftype_v4hi_v4hi
- = build_function_type_list (V8QI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v4hi_ftype_v2si_v2si
- = build_function_type_list (V4HI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree v8qi_ftype_v4hi_v8qi
- = build_function_type_list (V8QI_type_node,
- V4HI_type_node, V8QI_type_node, NULL_TREE);
- tree v2si_ftype_v4hi_v4hi
- = build_function_type_list (V2SI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v8qi_v8qi
- = build_function_type_list (V2SI_type_node,
- V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_di
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, long_long_integer_type_node,
- NULL_TREE);
- tree v2si_ftype_v2si_di
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, long_long_integer_type_node,
- NULL_TREE);
- tree di_ftype_void
- = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
- tree int_ftype_void
- = build_function_type_list (integer_type_node, NULL_TREE);
- tree di_ftype_v8qi
- = build_function_type_list (long_long_integer_type_node,
- V8QI_type_node, NULL_TREE);
- tree di_ftype_v4hi
- = build_function_type_list (long_long_integer_type_node,
- V4HI_type_node, NULL_TREE);
- tree di_ftype_v2si
- = build_function_type_list (long_long_integer_type_node,
- V2SI_type_node, NULL_TREE);
- tree v2si_ftype_v4hi
- = build_function_type_list (V2SI_type_node,
- V4HI_type_node, NULL_TREE);
- tree v4hi_ftype_v8qi
- = build_function_type_list (V4HI_type_node,
- V8QI_type_node, NULL_TREE);
- tree v8qi_ftype_v8qi
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v2si
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, NULL_TREE);
-
- tree di_ftype_di_v4hi_v4hi
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- V4HI_type_node, V4HI_type_node,
- NULL_TREE);
-
- tree di_ftype_v4hi_v4hi
- = build_function_type_list (long_long_unsigned_type_node,
- V4HI_type_node,V4HI_type_node,
- NULL_TREE);
-
- tree v2si_ftype_v2si_v4hi_v4hi
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V4HI_type_node,
- V4HI_type_node, NULL_TREE);
-
- tree v2si_ftype_v2si_v8qi_v8qi
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V8QI_type_node,
- V8QI_type_node, NULL_TREE);
-
- tree di_ftype_di_v2si_v2si
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- V2SI_type_node, V2SI_type_node,
- NULL_TREE);
-
- tree di_ftype_di_di_int
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- integer_type_node, NULL_TREE);
-
- tree void_ftype_int
- = build_function_type_list (void_type_node,
- integer_type_node, NULL_TREE);
-
- tree v8qi_ftype_char
- = build_function_type_list (V8QI_type_node,
- signed_char_type_node, NULL_TREE);
-
- tree v4hi_ftype_short
- = build_function_type_list (V4HI_type_node,
- short_integer_type_node, NULL_TREE);
-
- tree v2si_ftype_int
- = build_function_type_list (V2SI_type_node,
- integer_type_node, NULL_TREE);
-
- /* Normal vector binops. */
- tree v8qi_ftype_v8qi_v8qi
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_v4hi
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node,V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v2si_v2si
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree di_ftype_di_di
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- NULL_TREE);
-
- /* Add all builtins that are more or less simple operations on two
- operands. */
- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- {
- /* Use one of the operands; the target can have a different mode for
- mask-generating compares. */
- machine_mode mode;
- tree type;
-
- if (d->name == 0
- || !(d->feature == isa_bit_iwmmxt
- || d->feature == isa_bit_iwmmxt2))
- continue;
-
- mode = insn_data[d->icode].operand[1].mode;
-
- switch (mode)
- {
- case E_V8QImode:
- type = v8qi_ftype_v8qi_v8qi;
- break;
- case E_V4HImode:
- type = v4hi_ftype_v4hi_v4hi;
- break;
- case E_V2SImode:
- type = v2si_ftype_v2si_v2si;
- break;
- case E_DImode:
- type = di_ftype_di_di;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- def_mbuiltin (d->feature, d->name, type, d->code);
- }
-
- /* Add the remaining MMX insns with somewhat more complicated types. */
-#define iwmmx_mbuiltin(NAME, TYPE, CODE) \
- def_mbuiltin (isa_bit_iwmmxt, "__builtin_arm_" NAME, \
- (TYPE), ARM_BUILTIN_ ## CODE)
-
-#define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
- def_mbuiltin (isa_bit_iwmmxt2, "__builtin_arm_" NAME, \
- (TYPE), ARM_BUILTIN_ ## CODE)
-
- iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
- iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
- iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
- iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
- iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
- iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
- iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
- iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
- iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
-
- iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
- iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
- iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
- iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
- iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
- iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
-
- iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
- iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
- iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
- iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
- iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
- iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
-
- iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
- iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
- iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
- iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
- iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
- iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
-
- iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
- iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
- iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
- iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
- iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
- iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
-
- iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
-
- iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
- iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
- iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
- iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
- iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
- iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
- iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
- iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
- iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
- iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
-
- iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
- iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
- iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
- iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
- iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
- iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
- iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
- iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
- iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
-
- iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
- iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
- iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
-
- iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
- iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
- iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
-
- iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
- iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
-
- iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
- iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
- iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
- iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
- iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
- iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
-
- iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
- iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
- iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
- iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
- iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
- iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
- iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
- iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
- iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
- iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
- iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
- iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
-
- iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
- iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
- iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
- iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
-
- iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
- iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
- iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
- iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
- iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
- iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
- iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
-
- iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
- iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
- iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
-
- iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
- iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
- iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
- iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
-
- iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
- iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
- iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
- iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
-
- iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
- iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
- iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
- iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
-
- iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
- iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
- iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
- iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
-
- iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
- iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
- iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
- iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
-
- iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
- iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
- iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
- iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
-
- iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
-
- iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
- iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
- iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
-
-#undef iwmmx_mbuiltin
-#undef iwmmx2_mbuiltin
-}
-
static void
arm_init_fp16_builtins (void)
{
@@ -2454,9 +1693,6 @@ arm_init_fp16_builtins (void)
void
arm_init_builtins (void)
{
- if (TARGET_REALLY_IWMMXT)
- arm_init_iwmmxt_builtins ();
-
/* This creates the arm_simd_floatHF_type_node so must come before
arm_init_neon_builtins which uses it. */
arm_init_fp16_builtins ();
@@ -2546,15 +1782,11 @@ arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
clear instructions. */
static rtx
-safe_vector_operand (rtx x, machine_mode mode)
+safe_vector_operand (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
{
if (x != const0_rtx)
return x;
- x = gen_reg_rtx (mode);
-
- emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
- : gen_rtx_SUBREG (DImode, x, 0)));
- return x;
+ __builtin_unreachable ();
}
/* Function to expand ternary builtins. */
@@ -3053,8 +2285,7 @@ constant_arg:
builtin and error out if not. */
start_sequence ();
emit_insn (pat);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
if (recog_memoized (insn) < 0)
error ("this builtin is not supported for this target");
@@ -3266,21 +2497,10 @@ arm_general_expand_builtin (unsigned int fcode,
const struct builtin_description * d;
enum insn_code icode;
tree arg0;
- tree arg1;
- tree arg2;
rtx op0;
rtx op1;
- rtx op2;
rtx pat;
size_t i;
- machine_mode tmode;
- machine_mode mode0;
- machine_mode mode1;
- machine_mode mode2;
- int opint;
- int selector;
- int mask;
- int imm;
if (fcode == ARM_BUILTIN_SIMD_LANE_CHECK)
{
@@ -3369,499 +2589,6 @@ arm_general_expand_builtin (unsigned int fcode,
emit_insn (gen_cstoresi4 (target, op1, target, const0_rtx));
return target;
- case ARM_BUILTIN_TEXTRMSB:
- case ARM_BUILTIN_TEXTRMUB:
- case ARM_BUILTIN_TEXTRMSH:
- case ARM_BUILTIN_TEXTRMUH:
- case ARM_BUILTIN_TEXTRMSW:
- case ARM_BUILTIN_TEXTRMUW:
- icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
- : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
- : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
- : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
- : CODE_FOR_iwmmxt_textrmw);
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- {
- /* @@@ better error message */
- error ("selector must be an immediate");
- return gen_reg_rtx (tmode);
- }
-
- opint = INTVAL (op1);
- if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
- {
- if (opint > 7 || opint < 0)
- error ("the range of selector should be in 0 to 7");
- }
- else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
- {
- if (opint > 3 || opint < 0)
- error ("the range of selector should be in 0 to 3");
- }
- else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
- {
- if (opint > 1 || opint < 0)
- error ("the range of selector should be in 0 to 1");
- }
-
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WALIGNI:
- /* If op2 is immediate, call walighi, else call walighr. */
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- if (CONST_INT_P (op2))
- {
- icode = CODE_FOR_iwmmxt_waligni;
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
- selector = INTVAL (op2);
- if (selector > 7 || selector < 0)
- error ("the range of selector should be in 0 to 7");
- }
- else
- {
- icode = CODE_FOR_iwmmxt_walignr;
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- }
- if (target == 0
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_TINSRB:
- case ARM_BUILTIN_TINSRH:
- case ARM_BUILTIN_TINSRW:
- case ARM_BUILTIN_WMERGE:
- icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
- : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
- : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
- : CODE_FOR_iwmmxt_tinsrw);
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- {
- error ("selector must be an immediate");
- return const0_rtx;
- }
- if (icode == CODE_FOR_iwmmxt_wmerge)
- {
- selector = INTVAL (op2);
- if (selector > 7 || selector < 0)
- error ("the range of selector should be in 0 to 7");
- }
- if ((icode == CODE_FOR_iwmmxt_tinsrb)
- || (icode == CODE_FOR_iwmmxt_tinsrh)
- || (icode == CODE_FOR_iwmmxt_tinsrw))
- {
- mask = 0x01;
- selector= INTVAL (op2);
- if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
- error ("the range of selector should be in 0 to 7");
- else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
- error ("the range of selector should be in 0 to 3");
- else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
- error ("the range of selector should be in 0 to 1");
- mask <<= selector;
- op2 = GEN_INT (mask);
- }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_SETWCGR0:
- case ARM_BUILTIN_SETWCGR1:
- case ARM_BUILTIN_SETWCGR2:
- case ARM_BUILTIN_SETWCGR3:
- icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
- : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
- : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
- : CODE_FOR_iwmmxt_setwcgr3);
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- mode0 = insn_data[icode].operand[0].mode;
- if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- pat = GEN_FCN (icode) (op0);
- if (!pat)
- return 0;
- emit_insn (pat);
- return 0;
-
- case ARM_BUILTIN_GETWCGR0:
- case ARM_BUILTIN_GETWCGR1:
- case ARM_BUILTIN_GETWCGR2:
- case ARM_BUILTIN_GETWCGR3:
- icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
- : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
- : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
- : CODE_FOR_iwmmxt_getwcgr3);
- tmode = insn_data[icode].operand[0].mode;
- if (target == 0
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WSHUFH:
- icode = CODE_FOR_iwmmxt_wshufh;
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- error ("mask must be an immediate");
- return const0_rtx;
- }
- selector = INTVAL (op1);
- if (selector < 0 || selector > 255)
- error ("the range of mask should be in 0 to 255");
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WMADDS:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
- case ARM_BUILTIN_WMADDSX:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
- case ARM_BUILTIN_WMADDSN:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
- case ARM_BUILTIN_WMADDU:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
- case ARM_BUILTIN_WMADDUX:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
- case ARM_BUILTIN_WMADDUN:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
- case ARM_BUILTIN_WSADBZ:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
- case ARM_BUILTIN_WSADHZ:
- return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
-
- /* Several three-argument builtins. */
- case ARM_BUILTIN_WMACS:
- case ARM_BUILTIN_WMACU:
- case ARM_BUILTIN_TMIA:
- case ARM_BUILTIN_TMIAPH:
- case ARM_BUILTIN_TMIATT:
- case ARM_BUILTIN_TMIATB:
- case ARM_BUILTIN_TMIABT:
- case ARM_BUILTIN_TMIABB:
- case ARM_BUILTIN_WQMIABB:
- case ARM_BUILTIN_WQMIABT:
- case ARM_BUILTIN_WQMIATB:
- case ARM_BUILTIN_WQMIATT:
- case ARM_BUILTIN_WQMIABBN:
- case ARM_BUILTIN_WQMIABTN:
- case ARM_BUILTIN_WQMIATBN:
- case ARM_BUILTIN_WQMIATTN:
- case ARM_BUILTIN_WMIABB:
- case ARM_BUILTIN_WMIABT:
- case ARM_BUILTIN_WMIATB:
- case ARM_BUILTIN_WMIATT:
- case ARM_BUILTIN_WMIABBN:
- case ARM_BUILTIN_WMIABTN:
- case ARM_BUILTIN_WMIATBN:
- case ARM_BUILTIN_WMIATTN:
- case ARM_BUILTIN_WMIAWBB:
- case ARM_BUILTIN_WMIAWBT:
- case ARM_BUILTIN_WMIAWTB:
- case ARM_BUILTIN_WMIAWTT:
- case ARM_BUILTIN_WMIAWBBN:
- case ARM_BUILTIN_WMIAWBTN:
- case ARM_BUILTIN_WMIAWTBN:
- case ARM_BUILTIN_WMIAWTTN:
- case ARM_BUILTIN_WSADB:
- case ARM_BUILTIN_WSADH:
- icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
- : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
- : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
- : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
- : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
- : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
- : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
- : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
- : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
- : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
- : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
- : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
- : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
- : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
- : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
- : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
- : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
- : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
- : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
- : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
- : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
- : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
- : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
- : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
- : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
- : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
- : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
- : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
- : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
- : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
- : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
- : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
- : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
- : CODE_FOR_iwmmxt_wsadh);
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case ARM_BUILTIN_WZERO:
- target = gen_reg_rtx (DImode);
- emit_insn (gen_iwmmxt_clrdi (target));
- return target;
-
- case ARM_BUILTIN_WSRLHI:
- case ARM_BUILTIN_WSRLWI:
- case ARM_BUILTIN_WSRLDI:
- case ARM_BUILTIN_WSLLHI:
- case ARM_BUILTIN_WSLLWI:
- case ARM_BUILTIN_WSLLDI:
- case ARM_BUILTIN_WSRAHI:
- case ARM_BUILTIN_WSRAWI:
- case ARM_BUILTIN_WSRADI:
- case ARM_BUILTIN_WRORHI:
- case ARM_BUILTIN_WRORWI:
- case ARM_BUILTIN_WRORDI:
- case ARM_BUILTIN_WSRLH:
- case ARM_BUILTIN_WSRLW:
- case ARM_BUILTIN_WSRLD:
- case ARM_BUILTIN_WSLLH:
- case ARM_BUILTIN_WSLLW:
- case ARM_BUILTIN_WSLLD:
- case ARM_BUILTIN_WSRAH:
- case ARM_BUILTIN_WSRAW:
- case ARM_BUILTIN_WSRAD:
- case ARM_BUILTIN_WRORH:
- case ARM_BUILTIN_WRORW:
- case ARM_BUILTIN_WRORD:
- icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
- : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
- : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
- : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
- : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
- : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
- : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
- : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
- : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
- : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
- : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
- : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
- : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
- : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
- : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
- : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
- : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
- : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
- : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
- : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
- : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
- : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
- : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
- : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
- : CODE_FOR_nothing);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op1 = expand_normal (arg1);
- if (GET_MODE (op1) == VOIDmode)
- {
- imm = INTVAL (op1);
- if ((fcode == ARM_BUILTIN_WRORWI || fcode == ARM_BUILTIN_WRORW)
- && (imm < 0 || imm > 32))
- {
- const char *builtin = (fcode == ARM_BUILTIN_WRORWI
- ? "_mm_rori_pi32" : "_mm_ror_pi32");
- error ("the range of count should be in 0 to 32; "
- "please check the intrinsic %qs in code", builtin);
- }
- else if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORH)
- && (imm < 0 || imm > 16))
- {
- const char *builtin = (fcode == ARM_BUILTIN_WRORHI
- ? "_mm_rori_pi16" : "_mm_ror_pi16");
- error ("the range of count should be in 0 to 16; "
- "please check the intrinsic %qs in code", builtin);
- }
- else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
- && (imm < 0 || imm > 64))
- {
- const char *builtin = (fcode == ARM_BUILTIN_WRORDI
- ? "_mm_rori_si64" : "_mm_ror_si64");
- error ("the range of count should be in 0 to 64; "
- "please check the intrinsic %qs in code", builtin);
- }
- else if (imm < 0)
- {
- const char *builtin;
- switch (fcode)
- {
- case ARM_BUILTIN_WSRLHI:
- builtin = "_mm_srli_pi16";
- break;
- case ARM_BUILTIN_WSRLWI:
- builtin = "_mm_srli_pi32";
- break;
- case ARM_BUILTIN_WSRLDI:
- builtin = "_mm_srli_si64";
- break;
- case ARM_BUILTIN_WSLLHI:
- builtin = "_mm_slli_pi16";
- break;
- case ARM_BUILTIN_WSLLWI:
- builtin = "_mm_slli_pi32";
- break;
- case ARM_BUILTIN_WSLLDI:
- builtin = "_mm_slli_si64";
- break;
- case ARM_BUILTIN_WSRAHI:
- builtin = "_mm_srai_pi16";
- break;
- case ARM_BUILTIN_WSRAWI:
- builtin = "_mm_srai_pi32";
- break;
- case ARM_BUILTIN_WSRADI:
- builtin = "_mm_srai_si64";
- break;
- case ARM_BUILTIN_WSRLH:
- builtin = "_mm_srl_pi16";
- break;
- case ARM_BUILTIN_WSRLW:
- builtin = "_mm_srl_pi32";
- break;
- case ARM_BUILTIN_WSRLD:
- builtin = "_mm_srl_si64";
- break;
- case ARM_BUILTIN_WSLLH:
- builtin = "_mm_sll_pi16";
- break;
- case ARM_BUILTIN_WSLLW:
- builtin = "_mm_sll_pi32";
- break;
- case ARM_BUILTIN_WSLLD:
- builtin = "_mm_sll_si64";
- break;
- case ARM_BUILTIN_WSRAH:
- builtin = "_mm_sra_pi16";
- break;
- case ARM_BUILTIN_WSRAW:
- builtin = "_mm_sra_si64";
- break;
- default:
- builtin = "_mm_sra_si64";
- break;
- }
- error ("the count should be no less than 0; "
- "please check the intrinsic %qs in code", builtin);
- }
- }
- return arm_expand_binop_builtin (icode, exp, target);
-
default:
break;
}
diff --git a/gcc/config/arm/arm-c.cc b/gcc/config/arm/arm-c.cc
index 15e4080..d257e62 100644
--- a/gcc/config/arm/arm-c.cc
+++ b/gcc/config/arm/arm-c.cc
@@ -373,13 +373,6 @@ arm_cpu_builtins (struct cpp_reader* pfile)
builtin_define (arm_arch_name);
if (arm_arch_xscale)
builtin_define ("__XSCALE__");
- if (arm_arch_iwmmxt)
- {
- builtin_define ("__IWMMXT__");
- builtin_define ("__ARM_WMMX");
- }
- if (arm_arch_iwmmxt2)
- builtin_define ("__IWMMXT2__");
/* ARMv6KZ was originally identified as the misspelled __ARM_ARCH_6ZK__. To
preserve the existing behavior, the misspelled feature macro must still be
defined. */
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 1939d55..7f5a8c6 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -102,12 +102,6 @@ define feature armv8
# ARMv8 CRC32 instructions.
define feature crc32
-# XScale v2 (Wireless MMX).
-define feature iwmmxt
-
-# XScale Wireless MMX2.
-define feature iwmmxt2
-
# Architecture rel 8.1.
define feature armv8_1
@@ -778,18 +772,19 @@ begin arch armv9-a
option bf16 add bf16 FP_ARMv8 DOTPROD
end arch armv9-a
+# We no-longer support the iwmmxt{,2} extensions, so treat these like xscale.
begin arch iwmmxt
- tune for iwmmxt
+ tune for xscale
tune flags LDSCHED STRONG XSCALE
base 5TE
- isa ARMv5te xscale iwmmxt
+ isa ARMv5te xscale
end arch iwmmxt
begin arch iwmmxt2
- tune for iwmmxt2
+ tune for xscale
tune flags LDSCHED STRONG XSCALE
base 5TE
- isa ARMv5te xscale iwmmxt iwmmxt2
+ isa ARMv5te xscale
end arch iwmmxt2
# CPU entries
@@ -924,23 +919,12 @@ end cpu arm10e
begin cpu xscale
tune flags LDSCHED XSCALE
+ alias iwmmxt iwmmxt2
architecture armv5te
isa xscale
costs xscale
end cpu xscale
-begin cpu iwmmxt
- tune flags LDSCHED XSCALE
- architecture iwmmxt
- costs xscale
-end cpu iwmmxt
-
-begin cpu iwmmxt2
- tune flags LDSCHED XSCALE
- architecture iwmmxt2
- costs xscale
-end cpu iwmmxt2
-
begin cpu fa606te
tune flags LDSCHED
architecture armv5te
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
index c270056..a8af0e6 100644
--- a/gcc/config/arm/arm-generic.md
+++ b/gcc/config/arm/arm-generic.md
@@ -96,14 +96,14 @@
(and (eq_attr "generic_sched" "yes")
(and (eq_attr "ldsched" "yes")
(and (eq_attr "type" "load_byte,load_4")
- (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
+ (eq_attr "tune" "xscale"))))
"core")
(define_insn_reservation "load_ldsched" 2
(and (eq_attr "generic_sched" "yes")
(and (eq_attr "ldsched" "yes")
(and (eq_attr "type" "load_byte,load_4")
- (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
+ (eq_attr "tune" "!xscale"))))
"core")
(define_insn_reservation "load_or_store" 2
diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
index 06a1939..5c543bf 100644
--- a/gcc/config/arm/arm-opts.h
+++ b/gcc/config/arm/arm-opts.h
@@ -46,7 +46,6 @@ enum arm_abi_type
ARM_ABI_APCS,
ARM_ABI_ATPCS,
ARM_ABI_AAPCS,
- ARM_ABI_IWMMXT,
ARM_ABI_AAPCS_LINUX
};
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 254c731..ff7e765 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -190,8 +190,6 @@ extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
extern void arm_set_return_address (rtx, rtx);
extern int arm_eliminable_register (rtx);
extern const char *arm_output_shift(rtx *, int);
-extern const char *arm_output_iwmmxt_shift_immediate (const char *, rtx *, bool);
-extern const char *arm_output_iwmmxt_tinsr (rtx *);
extern unsigned int arm_sync_loop_insns (rtx , rtx *);
extern int arm_attr_length_push_multi(rtx, rtx);
extern int arm_attr_length_pop_multi(rtx *, bool, bool);
@@ -475,12 +473,6 @@ extern int arm_ld_sched;
/* Nonzero if this chip is a StrongARM. */
extern int arm_tune_strongarm;
-/* Nonzero if this chip supports Intel Wireless MMX technology. */
-extern int arm_arch_iwmmxt;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology. */
-extern int arm_arch_iwmmxt2;
-
/* Nonzero if this chip is an XScale. */
extern int arm_arch_xscale;
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index db7767a..544de84 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -67,12 +67,6 @@ EnumValue
Enum(processor_type) String(xscale) Value( TARGET_CPU_xscale)
EnumValue
-Enum(processor_type) String(iwmmxt) Value( TARGET_CPU_iwmmxt)
-
-EnumValue
-Enum(processor_type) String(iwmmxt2) Value( TARGET_CPU_iwmmxt2)
-
-EnumValue
Enum(processor_type) String(fa606te) Value( TARGET_CPU_fa606te)
EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index a04d1ee..20b5f93 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -25,31 +25,30 @@
fa526,fa626,arm7tdmi,
arm710t,arm9,arm9tdmi,
arm920t,arm10tdmi,arm9e,
- arm10e,xscale,iwmmxt,
- iwmmxt2,fa606te,fa626te,
- fmp626,fa726te,arm926ejs,
- arm1026ejs,arm1136js,arm1136jfs,
- arm1176jzs,arm1176jzfs,mpcorenovfp,
- mpcore,arm1156t2s,arm1156t2fs,
- cortexm1,cortexm0,cortexm0plus,
- cortexm1smallmultiply,cortexm0smallmultiply,cortexm0plussmallmultiply,
- genericv7a,cortexa5,cortexa7,
- cortexa8,cortexa9,cortexa12,
- cortexa15,cortexa17,cortexr4,
- cortexr4f,cortexr5,cortexr7,
- cortexr8,cortexm7,cortexm4,
- cortexm3,marvell_pj4,cortexa15cortexa7,
- cortexa17cortexa7,cortexa32,cortexa35,
- cortexa53,cortexa57,cortexa72,
- cortexa73,exynosm1,xgene1,
- cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,
- cortexa73cortexa53,cortexa55,cortexa75,
- cortexa76,cortexa76ae,cortexa77,
- cortexa78,cortexa78ae,cortexa78c,
- cortexa710,cortexx1,cortexx1c,
- neoversen1,cortexa75cortexa55,cortexa76cortexa55,
- neoversev1,neoversen2,cortexm23,
- cortexm33,cortexm35p,cortexm52,
- cortexm55,starmc1,cortexm85,
- cortexr52,cortexr52plus"
+ arm10e,xscale,fa606te,
+ fa626te,fmp626,fa726te,
+ arm926ejs,arm1026ejs,arm1136js,
+ arm1136jfs,arm1176jzs,arm1176jzfs,
+ mpcorenovfp,mpcore,arm1156t2s,
+ arm1156t2fs,cortexm1,cortexm0,
+ cortexm0plus,cortexm1smallmultiply,cortexm0smallmultiply,
+ cortexm0plussmallmultiply,genericv7a,cortexa5,
+ cortexa7,cortexa8,cortexa9,
+ cortexa12,cortexa15,cortexa17,
+ cortexr4,cortexr4f,cortexr5,
+ cortexr7,cortexr8,cortexm7,
+ cortexm4,cortexm3,marvell_pj4,
+ cortexa15cortexa7,cortexa17cortexa7,cortexa32,
+ cortexa35,cortexa53,cortexa57,
+ cortexa72,cortexa73,exynosm1,
+ xgene1,cortexa57cortexa53,cortexa72cortexa53,
+ cortexa73cortexa35,cortexa73cortexa53,cortexa55,
+ cortexa75,cortexa76,cortexa76ae,
+ cortexa77,cortexa78,cortexa78ae,
+ cortexa78c,cortexa710,cortexx1,
+ cortexx1c,neoversen1,cortexa75cortexa55,
+ cortexa76cortexa55,neoversev1,neoversen2,
+ cortexm23,cortexm33,cortexm35p,
+ cortexm52,cortexm55,starmc1,
+ cortexm85,cortexr52,cortexr52plus"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 670f487..bde06f3 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -948,12 +948,6 @@ int arm_ld_sched = 0;
/* Nonzero if this chip is a StrongARM. */
int arm_tune_strongarm = 0;
-/* Nonzero if this chip supports Intel Wireless MMX technology. */
-int arm_arch_iwmmxt = 0;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology. */
-int arm_arch_iwmmxt2 = 0;
-
/* Nonzero if this chip is an XScale. */
int arm_arch_xscale = 0;
@@ -2970,11 +2964,6 @@ arm_option_check_internal (struct gcc_options *opts)
{
int flags = opts->x_target_flags;
- /* iWMMXt and NEON are incompatible. */
- if (TARGET_IWMMXT
- && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
- error ("iWMMXt and NEON are incompatible");
-
/* Make sure that the processor choice does not conflict with any of the
other command line choices. */
if (TARGET_ARM_P (flags)
@@ -2997,10 +2986,6 @@ arm_option_check_internal (struct gcc_options *opts)
warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
"debugging");
- /* iWMMXt unsupported under Thumb mode. */
- if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
- error ("iWMMXt unsupported under Thumb mode");
-
if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
@@ -3928,8 +3913,6 @@ arm_option_reconfigure_globals (void)
arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
- arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
- arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
@@ -3997,12 +3980,6 @@ arm_options_perform_arch_sanity_checks (void)
if (arm_arch5t)
target_flags &= ~MASK_INTERWORK;
- if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
- error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
-
- if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
- error ("iwmmxt abi requires an iwmmxt capable cpu");
-
/* BPABI targets use linker tricks to allow interworking on cores
without thumb support. */
if (TARGET_INTERWORK
@@ -4043,9 +4020,7 @@ arm_options_perform_arch_sanity_checks (void)
if (TARGET_AAPCS_BASED)
{
- if (arm_abi == ARM_ABI_IWMMXT)
- arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
- else if (TARGET_HARD_FLOAT_ABI)
+ if (TARGET_HARD_FLOAT_ABI)
{
arm_pcs_default = ARM_PCS_AAPCS_VFP;
if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
@@ -4555,11 +4530,6 @@ use_return_insn (int iscond, rtx sibling)
if (reg_needs_saving_p (regno))
return 0;
- if (TARGET_REALLY_IWMMXT)
- for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
- if (reg_needs_saving_p (regno))
- return 0;
-
return 1;
}
@@ -6048,9 +6018,6 @@ arm_libcall_value_1 (machine_mode mode)
{
if (TARGET_AAPCS_BASED)
return aapcs_libcall_value (mode);
- else if (TARGET_IWMMXT_ABI
- && arm_vector_mode_supported_p (mode))
- return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
else
return gen_rtx_REG (mode, ARG_REGISTER (1));
}
@@ -6083,9 +6050,7 @@ arm_function_value_regno_p (const unsigned int regno)
|| (TARGET_32BIT
&& TARGET_AAPCS_BASED
&& TARGET_HARD_FLOAT
- && regno == FIRST_VFP_REGNUM)
- || (TARGET_IWMMXT_ABI
- && regno == FIRST_IWMMXT_REGNUM))
+ && regno == FIRST_VFP_REGNUM))
return true;
return false;
@@ -6102,8 +6067,6 @@ arm_apply_result_size (void)
{
if (TARGET_HARD_FLOAT_ABI)
size += 32;
- if (TARGET_IWMMXT_ABI)
- size += 8;
}
return size;
@@ -6265,7 +6228,6 @@ const struct pcs_attribute_arg
#if 0
/* We could recognize these, but changes would be needed elsewhere
* to implement them. */
- {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
{"atpcs", ARM_PCS_ATPCS},
{"apcs", ARM_PCS_APCS},
#endif
@@ -7195,26 +7157,12 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
/* On the ARM, the offset starts at 0. */
pcum->nregs = 0;
- pcum->iwmmxt_nregs = 0;
pcum->can_split = true;
/* Varargs vectors are treated the same as long long.
named_count avoids having to change the way arm handles 'named' */
pcum->named_count = 0;
pcum->nargs = 0;
-
- if (TARGET_REALLY_IWMMXT && fntype)
- {
- tree fn_arg;
-
- for (fn_arg = TYPE_ARG_TYPES (fntype);
- fn_arg;
- fn_arg = TREE_CHAIN (fn_arg))
- pcum->named_count += 1;
-
- if (! pcum->named_count)
- pcum->named_count = INT_MAX;
- }
}
/* Return 2 if double word alignment is required for argument passing,
@@ -7308,22 +7256,6 @@ arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
return pcum->aapcs_reg;
}
- /* Varargs vectors are treated the same as long long.
- named_count avoids having to change the way arm handles 'named' */
- if (TARGET_IWMMXT_ABI
- && arm_vector_mode_supported_p (arg.mode)
- && pcum->named_count > pcum->nargs + 1)
- {
- if (pcum->iwmmxt_nregs <= 9)
- return gen_rtx_REG (arg.mode,
- pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
- else
- {
- pcum->can_split = false;
- return NULL_RTX;
- }
- }
-
/* Put doubleword aligned quantities in even register pairs. */
if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
{
@@ -7383,9 +7315,6 @@ arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
return pcum->aapcs_partial;
}
- if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
- return 0;
-
if (NUM_ARG_REGS > nregs
&& (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
&& pcum->can_split)
@@ -7422,12 +7351,7 @@ arm_function_arg_advance (cumulative_args_t pcum_v,
else
{
pcum->nargs += 1;
- if (arm_vector_mode_supported_p (arg.mode)
- && pcum->named_count > pcum->nargs
- && TARGET_IWMMXT_ABI)
- pcum->iwmmxt_nregs += 1;
- else
- pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
+ pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
}
}
@@ -8149,8 +8073,7 @@ require_pic_register (rtx pic_reg, bool compute_now)
else
arm_load_pic_register (0UL, pic_reg);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
for (insn = seq; insn; insn = NEXT_INSN (insn))
if (INSN_P (insn))
@@ -8906,12 +8829,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
- if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
- return (code == CONST_INT
- && INTVAL (index) < 1024
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
-
if (GET_MODE_SIZE (mode) <= 4
&& ! (arm_arch4
&& (mode == HImode
@@ -8991,17 +8908,6 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
&& INTVAL (index) > -256
&& (INTVAL (index) & 3) == 0);
- if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
- {
- /* For DImode assume values will usually live in core regs
- and only allow LDRD addressing modes. */
- if (!TARGET_LDRD || mode != DImode)
- return (code == CONST_INT
- && INTVAL (index) < 1024
- && INTVAL (index) > -1024
- && (INTVAL (index) & 3) == 0);
- }
-
/* For quad modes, we restrict the constant offset to be slightly less
than what the instruction format permits. We do this because for
quad mode moves, we will actually decompose them into two separate
@@ -9372,10 +9278,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
LCT_PURE, /* LCT_CONST? */
Pmode, reg, Pmode);
- rtx_insn *insns = get_insns ();
- end_sequence ();
-
- return insns;
+ return end_sequence ();
}
static rtx
@@ -12463,11 +12366,6 @@ arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
|| (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
return 15;
- else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
- || (from != IWMMXT_REGS && to == IWMMXT_REGS))
- return 4;
- else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
- return 20;
else
return 2;
}
@@ -14993,8 +14891,6 @@ arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
if (!multiple_operation_profitable_p (false, count, 0))
{
- rtx seq;
-
start_sequence ();
for (i = 0; i < count; i++)
@@ -15003,10 +14899,7 @@ arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
if (wback_offset != 0)
emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
- seq = get_insns ();
- end_sequence ();
-
- return seq;
+ return end_sequence ();
}
result = gen_rtx_PARALLEL (VOIDmode,
@@ -15044,8 +14937,6 @@ arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
if (!multiple_operation_profitable_p (false, count, 0))
{
- rtx seq;
-
start_sequence ();
for (i = 0; i < count; i++)
@@ -15054,10 +14945,7 @@ arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
if (wback_offset != 0)
emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
- seq = get_insns ();
- end_sequence ();
-
- return seq;
+ return end_sequence ();
}
result = gen_rtx_PARALLEL (VOIDmode,
@@ -16211,14 +16099,16 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
case UNGT:
case UNGE:
case UNEQ:
- case LTGT:
return CCFPmode;
case LT:
case LE:
case GT:
case GE:
- return CCFPEmode;
+ case LTGT:
+ return (flag_finite_math_only
+ ? CCFPmode
+ : CCFPEmode);
default:
gcc_unreachable ();
@@ -17581,8 +17471,7 @@ struct minipool_node
rtx value;
/* The mode of value. */
machine_mode mode;
- /* The size of the value. With iWMMXt enabled
- sizes > 4 also imply an alignment of 8-bytes. */
+ /* The size of the value. */
int fix_size;
};
@@ -18942,8 +18831,7 @@ cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
XVECEXP (par, 0, k++) = set;
emit_use (reg);
}
- use_seq = get_insns ();
- end_sequence ();
+ use_seq = end_sequence ();
emit_insn_after (use_seq, emit_insn (par));
}
@@ -18988,8 +18876,7 @@ cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
XVECEXP (par, 0, j) = clobber;
- use_seq = get_insns ();
- end_sequence ();
+ use_seq = end_sequence ();
emit_insn_after (use_seq, emit_insn (par));
}
@@ -19230,8 +19117,7 @@ cmse_nonsecure_call_inline_register_clear (void)
cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
NUM_ARG_REGS, ip_reg, clearing_reg);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
emit_insn_before (seq, insn);
/* The AAPCS requires the callee to widen integral types narrower
@@ -20245,9 +20131,7 @@ output_move_double (rtx *operands, bool emit, int *count)
}
else
{
- /* Use a single insn if we can.
- FIXME: IWMMXT allows offsets larger than ldrd can
- handle, fix these up with a pair of ldr. */
+ /* Use a single insn if we can. */
if (can_ldrd
&& (TARGET_THUMB2
|| !CONST_INT_P (otherops[2])
@@ -20272,9 +20156,7 @@ output_move_double (rtx *operands, bool emit, int *count)
}
else
{
- /* Use a single insn if we can.
- FIXME: IWMMXT allows offsets larger than ldrd can handle,
- fix these up with a pair of ldr. */
+ /* Use a single insn if we can. */
if (can_ldrd
&& (TARGET_THUMB2
|| !CONST_INT_P (otherops[2])
@@ -20512,8 +20394,6 @@ output_move_double (rtx *operands, bool emit, int *count)
otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
- /* IWMMXT allows offsets larger than strd can handle,
- fix these up with a pair of str. */
if (!TARGET_THUMB2
&& CONST_INT_P (otherops[2])
&& (INTVAL(otherops[2]) <= -256
@@ -21450,34 +21330,6 @@ arm_compute_save_core_reg_mask (void)
if (cfun->machine->lr_save_eliminated)
save_reg_mask &= ~ (1 << LR_REGNUM);
- if (TARGET_REALLY_IWMMXT
- && ((bit_count (save_reg_mask)
- + ARM_NUM_INTS (crtl->args.pretend_args_size +
- arm_compute_static_chain_stack_bytes())
- ) % 2) != 0)
- {
- /* The total number of registers that are going to be pushed
- onto the stack is odd. We need to ensure that the stack
- is 64-bit aligned before we start to save iWMMXt registers,
- and also before we start to create locals. (A local variable
- might be a double or long long which we will load/store using
- an iWMMXt instruction). Therefore we need to push another
- ARM register, so that the stack will be 64-bit aligned. We
- try to avoid using the arg registers (r0 -r3) as they might be
- used to pass values in a tail call. */
- for (reg = 4; reg <= 12; reg++)
- if ((save_reg_mask & (1 << reg)) == 0)
- break;
-
- if (reg <= 12)
- save_reg_mask |= (1 << reg);
- else
- {
- cfun->machine->sibcall_blocked = 1;
- save_reg_mask |= (1 << 3);
- }
- }
-
/* We may need to push an additional register for use initializing the
PIC base register. */
if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
@@ -21685,19 +21537,17 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
{
- /* There are three possible reasons for the IP register
- being saved. 1) a stack frame was created, in which case
- IP contains the old stack pointer, or 2) an ISR routine
- corrupted it, or 3) it was saved to align the stack on
- iWMMXt. In case 1, restore IP into SP, otherwise just
- restore IP. */
+ /* There are two possible reasons for the IP register being saved.
+ 1) a stack frame was created, in which case IP contains the old
+ stack pointer, or 2) an ISR routine corrupted it. In case 1,
+ restore IP into SP, otherwise just restore IP. */
if (frame_pointer_needed)
{
live_regs_mask &= ~ (1 << IP_REGNUM);
live_regs_mask |= (1 << SP_REGNUM);
}
else
- gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
+ gcc_assert (IS_INTERRUPT (func_type));
}
/* On some ARM architectures it is faster to use LDR rather than
@@ -23149,8 +22999,6 @@ arm_compute_frame_layout (void)
if (TARGET_32BIT)
{
- unsigned int regno;
-
offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
core_saved = bit_count (offsets->saved_regs_mask) * 4;
saved = core_saved;
@@ -23159,16 +23007,6 @@ arm_compute_frame_layout (void)
preserve that condition at any subroutine call. We also require the
soft frame pointer to be doubleword aligned. */
- if (TARGET_REALLY_IWMMXT)
- {
- /* Check for the call-saved iWMMXt registers. */
- for (regno = FIRST_IWMMXT_REGNUM;
- regno <= LAST_IWMMXT_REGNUM;
- regno++)
- if (reg_needs_saving_p (regno))
- saved += 8;
- }
-
func_type = arm_current_func_type ();
/* Space for saved VFP registers. */
if (! IS_VOLATILE (func_type)
@@ -23384,18 +23222,6 @@ arm_save_coproc_regs(void)
int saved_size = 0;
unsigned reg;
unsigned start_reg;
- rtx insn;
-
- if (TARGET_REALLY_IWMMXT)
- for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
- if (reg_needs_saving_p (reg))
- {
- insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
- insn = gen_rtx_MEM (V2SImode, insn);
- insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
- RTX_FRAME_RELATED_P (insn) = 1;
- saved_size += 8;
- }
if (TARGET_VFP_BASE)
{
@@ -24554,42 +24380,9 @@ arm_print_operand (FILE *stream, rtx x, int code)
return;
case 'U':
- if (!REG_P (x)
- || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
- || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
- /* Bad value for wCG register number. */
- {
- output_operand_lossage ("invalid operand for code '%c'", code);
- return;
- }
-
- else
- fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
- return;
-
- /* Print an iWMMXt control register name. */
case 'w':
- if (!CONST_INT_P (x)
- || INTVAL (x) < 0
- || INTVAL (x) >= 16)
- /* Bad value for wC register number. */
- {
- output_operand_lossage ("invalid operand for code '%c'", code);
- return;
- }
-
- else
- {
- static const char * wc_reg_names [16] =
- {
- "wCID", "wCon", "wCSSF", "wCASF",
- "wC4", "wC5", "wC6", "wC7",
- "wCGR0", "wCGR1", "wCGR2", "wCGR3",
- "wC12", "wC13", "wC14", "wC15"
- };
-
- fputs (wc_reg_names [INTVAL (x)], stream);
- }
+ /* Former iWMMXT support, removed after GCC-15. */
+ output_operand_lossage ("obsolete iWMMXT format code '%c'", code);
return;
/* Print the high single-precision register of a VFP double-precision
@@ -25924,15 +25717,6 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
return false;
}
- if (TARGET_REALLY_IWMMXT)
- {
- if (IS_IWMMXT_GR_REGNUM (regno))
- return mode == SImode;
-
- if (IS_IWMMXT_REGNUM (regno))
- return VALID_IWMMXT_REG_MODE (mode);
- }
-
/* We allow almost any value to be stored in the general registers.
Restrict doubleword quantities to even register pairs in ARM state
so that we can use ldrd. The same restriction applies for MVE
@@ -26038,12 +25822,6 @@ arm_regno_class (int regno)
return VFP_HI_REGS;
}
- if (IS_IWMMXT_REGNUM (regno))
- return IWMMXT_REGS;
-
- if (IS_IWMMXT_GR_REGNUM (regno))
- return IWMMXT_GR_REGS;
-
return NO_REGS;
}
@@ -27961,27 +27739,6 @@ arm_expand_epilogue_apcs_frame (bool really_return)
gen_rtx_REG (SImode, IP_REGNUM));
}
- if (TARGET_IWMMXT)
- {
- /* The frame pointer is guaranteed to be non-double-word aligned, as
- it is set to double-word-aligned old_stack_pointer - 4. */
- rtx_insn *insn;
- int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
-
- for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
- if (reg_needs_saving_p (i))
- {
- rtx addr = gen_frame_mem (V2SImode,
- plus_constant (Pmode, hard_frame_pointer_rtx,
- - lrm_count * 4));
- insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
- REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
- gen_rtx_REG (V2SImode, i),
- NULL_RTX);
- lrm_count += 2;
- }
- }
-
/* saved_regs_mask should contain IP which contains old stack pointer
at the time of activation creation. Since SP and IP are adjacent registers,
we can restore the value directly into SP. */
@@ -28194,23 +27951,6 @@ arm_expand_epilogue (bool really_return)
stack_pointer_rtx);
}
- if (TARGET_IWMMXT)
- for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
- if (reg_needs_saving_p (i))
- {
- rtx_insn *insn;
- rtx addr = gen_rtx_MEM (V2SImode,
- gen_rtx_POST_INC (SImode,
- stack_pointer_rtx));
- set_mem_alias_set (addr, get_frame_alias_set ());
- insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
- REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
- gen_rtx_REG (V2SImode, i),
- NULL_RTX);
- arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
- stack_pointer_rtx, stack_pointer_rtx);
- }
-
if (saved_regs_mask)
{
rtx insn;
@@ -29851,7 +29591,7 @@ arm_vector_mode_supported_p (machine_mode mode)
|| mode == V8BFmode))
return true;
- if ((TARGET_NEON || TARGET_IWMMXT)
+ if (TARGET_NEON
&& ((mode == V2SImode)
|| (mode == V4HImode)
|| (mode == V8QImode)))
@@ -29943,19 +29683,6 @@ arm_preferred_simd_mode (scalar_mode mode)
default:;
}
- if (TARGET_REALLY_IWMMXT)
- switch (mode)
- {
- case E_SImode:
- return V2SImode;
- case E_HImode:
- return V4HImode;
- case E_QImode:
- return V8QImode;
-
- default:;
- }
-
if (TARGET_HAVE_MVE)
switch (mode)
{
@@ -30037,12 +29764,6 @@ arm_debugger_regno (unsigned int regno)
return 256 + (regno - FIRST_VFP_REGNUM) / 2;
}
- if (IS_IWMMXT_GR_REGNUM (regno))
- return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
-
- if (IS_IWMMXT_REGNUM (regno))
- return 112 + regno - FIRST_IWMMXT_REGNUM;
-
if (IS_PAC_REGNUM (regno))
return DWARF_PAC_REGNUM;
@@ -30629,95 +30350,6 @@ arm_output_shift(rtx * operands, int set_flags)
return "";
}
-/* Output assembly for a WMMX immediate shift instruction. */
-const char *
-arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
-{
- int shift = INTVAL (operands[2]);
- char templ[50];
- machine_mode opmode = GET_MODE (operands[0]);
-
- gcc_assert (shift >= 0);
-
- /* If the shift value in the register versions is > 63 (for D qualifier),
- 31 (for W qualifier) or 15 (for H qualifier). */
- if (((opmode == V4HImode) && (shift > 15))
- || ((opmode == V2SImode) && (shift > 31))
- || ((opmode == DImode) && (shift > 63)))
- {
- if (wror_or_wsra)
- {
- sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
- output_asm_insn (templ, operands);
- if (opmode == DImode)
- {
- sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
- output_asm_insn (templ, operands);
- }
- }
- else
- {
- /* The destination register will contain all zeros. */
- sprintf (templ, "wzero\t%%0");
- output_asm_insn (templ, operands);
- }
- return "";
- }
-
- if ((opmode == DImode) && (shift > 32))
- {
- sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
- output_asm_insn (templ, operands);
- sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
- output_asm_insn (templ, operands);
- }
- else
- {
- sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
- output_asm_insn (templ, operands);
- }
- return "";
-}
-
-/* Output assembly for a WMMX tinsr instruction. */
-const char *
-arm_output_iwmmxt_tinsr (rtx *operands)
-{
- int mask = INTVAL (operands[3]);
- int i;
- char templ[50];
- int units = mode_nunits[GET_MODE (operands[0])];
- gcc_assert ((mask & (mask - 1)) == 0);
- for (i = 0; i < units; ++i)
- {
- if ((mask & 0x01) == 1)
- {
- break;
- }
- mask >>= 1;
- }
- gcc_assert (i < units);
- {
- switch (GET_MODE (operands[0]))
- {
- case E_V8QImode:
- sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
- break;
- case E_V4HImode:
- sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
- break;
- case E_V2SImode:
- sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
- break;
- default:
- gcc_unreachable ();
- break;
- }
- output_asm_insn (templ, operands);
- }
- return "";
-}
-
/* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
Responsible for the handling of switch statements in arm. */
const char *
@@ -31090,26 +30722,6 @@ arm_conditional_register_usage (void)
fixed_regs[VPR_REGNUM] = 0;
}
- if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
- {
- regno = FIRST_IWMMXT_GR_REGNUM;
- /* The 2002/10/09 revision of the XScale ABI has wCG0
- and wCG1 as call-preserved registers. The 2002/11/21
- revision changed this so that all wCG registers are
- scratch registers. */
- for (regno = FIRST_IWMMXT_GR_REGNUM;
- regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
- fixed_regs[regno] = 0;
- /* The XScale ABI has wR0 - wR9 as scratch registers,
- the rest as call-preserved registers. */
- for (regno = FIRST_IWMMXT_REGNUM;
- regno <= LAST_IWMMXT_REGNUM; ++ regno)
- {
- fixed_regs[regno] = 0;
- call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
- }
- }
-
if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
{
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
@@ -35959,8 +35571,7 @@ arm_attempt_dlstp_transform (rtx label)
emit_insn (PATTERN (insn));
}
}
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
/* Re-write the entire BB contents with the transformed
sequence. */
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8472b75..2e9d678 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -137,13 +137,6 @@ emission of floating point pcs attributes. */
#define TARGET_MAYBE_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT)
/* Use hardware floating point calling convention. */
#define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD)
-#define TARGET_IWMMXT (arm_arch_iwmmxt)
-#define TARGET_IWMMXT2 (arm_arch_iwmmxt2)
-#define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT \
- && !TARGET_GENERAL_REGS_ONLY)
-#define TARGET_REALLY_IWMMXT2 (TARGET_IWMMXT2 && TARGET_32BIT \
- && !TARGET_GENERAL_REGS_ONLY)
-#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
#define TARGET_ARM (! TARGET_THUMB)
#define TARGET_EITHER 1 /* (TARGET_ARM | TARGET_THUMB) */
#define TARGET_BACKTRACE (crtl->is_leaf \
@@ -526,12 +519,6 @@ extern int arm_ld_sched;
/* Nonzero if this chip is a StrongARM. */
extern int arm_tune_strongarm;
-/* Nonzero if this chip supports Intel XScale with Wireless MMX technology. */
-extern int arm_arch_iwmmxt;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology. */
-extern int arm_arch_iwmmxt2;
-
/* Nonzero if this chip is an XScale. */
extern int arm_arch_xscale;
@@ -855,10 +842,6 @@ extern const int arm_arch_cde_coproc_bits[];
1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1, \
- /* IWMMXT regs. */ \
- 1,1,1,1,1,1,1,1, \
- 1,1,1,1,1,1,1,1, \
- 1,1,1,1, \
/* Specials. */ \
1,1,1,1,1,1,1,1 \
}
@@ -885,10 +868,6 @@ extern const int arm_arch_cde_coproc_bits[];
1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1, \
1,1,1,1,1,1,1,1, \
- /* IWMMXT regs. */ \
- 1,1,1,1,1,1,1,1, \
- 1,1,1,1,1,1,1,1, \
- 1,1,1,1, \
/* Specials. */ \
1,1,1,1,1,1,1,1 \
}
@@ -1010,23 +989,11 @@ extern const int arm_arch_cde_coproc_bits[];
/* Register to use for pushing function arguments. */
#define STACK_POINTER_REGNUM SP_REGNUM
-#define FIRST_IWMMXT_REGNUM (LAST_HI_VFP_REGNUM + 1)
-#define LAST_IWMMXT_REGNUM (FIRST_IWMMXT_REGNUM + 15)
-
-/* Need to sync with WCGR in iwmmxt.md. */
-#define FIRST_IWMMXT_GR_REGNUM (LAST_IWMMXT_REGNUM + 1)
-#define LAST_IWMMXT_GR_REGNUM (FIRST_IWMMXT_GR_REGNUM + 3)
-
-#define IS_IWMMXT_REGNUM(REGNUM) \
- (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM))
-#define IS_IWMMXT_GR_REGNUM(REGNUM) \
- (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM))
-
/* Base register for access to local variables of the function. */
-#define FRAME_POINTER_REGNUM 102
+#define FRAME_POINTER_REGNUM (CC_REGNUM + 2)
/* Base register for access to arguments of the function. */
-#define ARG_POINTER_REGNUM 103
+#define ARG_POINTER_REGNUM (FRAME_POINTER_REGNUM + 1)
#define FIRST_VFP_REGNUM 16
#define D7_VFP_REGNUM (FIRST_VFP_REGNUM + 15)
@@ -1067,9 +1034,8 @@ extern const int arm_arch_cde_coproc_bits[];
/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP
+ 1 APSRQ + 1 APSRGE + 1 VPR + 1 Pseudo register to save PAC. */
-/* Intel Wireless MMX Technology registers add 16 + 4 more. */
/* VFP (VFP3) adds 32 (64) + 1 VFPCC. */
-#define FIRST_PSEUDO_REGISTER 108
+#define FIRST_PSEUDO_REGISTER 88
#define DWARF_PAC_REGNUM 143
@@ -1086,9 +1052,6 @@ extern const int arm_arch_cde_coproc_bits[];
#define SUBTARGET_FRAME_POINTER_REQUIRED 0
#endif
-#define VALID_IWMMXT_REG_MODE(MODE) \
- (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
-
/* Modes valid for Neon D registers. */
#define VALID_NEON_DREG_MODE(MODE) \
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
@@ -1168,9 +1131,9 @@ extern const int arm_arch_cde_coproc_bits[];
/* The conditions under which vector modes are supported for general
arithmetic by any vector extension. */
-#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH)
+#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH)
+#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH)
#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE)
#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE)
@@ -1204,9 +1167,9 @@ extern const int arm_arch_cde_coproc_bits[];
/* The conditions under which vector modes are supported by load/store
instructions by any vector extension. */
-#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST)
+#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST)
+#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST)
#define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
#define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
@@ -1238,8 +1201,6 @@ extern int arm_regs_in_sequence[];
function. */
#define VREG(X) (FIRST_VFP_REGNUM + (X))
-#define WREG(X) (FIRST_IWMMXT_REGNUM + (X))
-#define WGREG(X) (FIRST_IWMMXT_GR_REGNUM + (X))
#define REG_ALLOC_ORDER \
{ \
@@ -1265,12 +1226,6 @@ extern int arm_regs_in_sequence[];
VREG(20), VREG(21), VREG(22), VREG(23), \
VREG(24), VREG(25), VREG(26), VREG(27), \
VREG(28), VREG(29), VREG(30), VREG(31), \
- /* IWMMX registers. */ \
- WREG(0), WREG(1), WREG(2), WREG(3), \
- WREG(4), WREG(5), WREG(6), WREG(7), \
- WREG(8), WREG(9), WREG(10), WREG(11), \
- WREG(12), WREG(13), WREG(14), WREG(15), \
- WGREG(0), WGREG(1), WGREG(2), WGREG(3), \
/* Registers not for general use. */ \
CC_REGNUM, VFPCC_REGNUM, \
FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, \
@@ -1315,8 +1270,6 @@ enum reg_class
VFP_LO_REGS,
VFP_HI_REGS,
VFP_REGS,
- IWMMXT_REGS,
- IWMMXT_GR_REGS,
CC_REG,
VFPCC_REG,
SFP_REG,
@@ -1346,8 +1299,6 @@ enum reg_class
"VFP_LO_REGS", \
"VFP_HI_REGS", \
"VFP_REGS", \
- "IWMMXT_REGS", \
- "IWMMXT_GR_REGS", \
"CC_REG", \
"VFPCC_REG", \
"SFP_REG", \
@@ -1363,29 +1314,27 @@ enum reg_class
of length N_REG_CLASSES. */
#define REG_CLASS_CONTENTS \
{ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
- { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */ \
- { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
- { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */ \
- { 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \
- { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
- { 0x00005555, 0x00000000, 0x00000000, 0x00000000 }, /* EVEN_REGS. */ \
- { 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
- { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
- { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS */ \
- { 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000 }, /* VFP_LO_REGS */ \
- { 0x00000000, 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_HI_REGS */ \
- { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF, 0x00000000 }, /* VFP_REGS */ \
- { 0x00000000, 0x00000000, 0xFFFF0000, 0x00000000 }, /* IWMMXT_REGS */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x0000000F }, /* IWMMXT_GR_REGS */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000010 }, /* CC_REG */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000020 }, /* VFPCC_REG */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000040 }, /* SFP_REG */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000080 }, /* AFP_REG */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000400 }, /* VPR_REG. */ \
- { 0x00000000, 0x00000000, 0x00000000, 0x00000800 }, /* PAC_REG. */ \
- { 0x00005FFF, 0x00000000, 0x00000000, 0x00000400 }, /* GENERAL_AND_VPR_REGS. */ \
- { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000040F } /* ALL_REGS. */ \
+ { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
+ { 0x000000FF, 0x00000000, 0x00000000 }, /* LO_REGS */ \
+ { 0x00002000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
+ { 0x000020FF, 0x00000000, 0x00000000 }, /* BASE_REGS */ \
+ { 0x00005F00, 0x00000000, 0x00000000 }, /* HI_REGS */ \
+ { 0x0000100F, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
+ { 0x00005555, 0x00000000, 0x00000000 }, /* EVEN_REGS. */ \
+ { 0x00005FFF, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
+ { 0x00007FFF, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
+ { 0xFFFF0000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS */ \
+ { 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_LO_REGS */ \
+ { 0x00000000, 0xFFFF0000, 0x0000FFFF }, /* VFP_HI_REGS */ \
+ { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF }, /* VFP_REGS */ \
+ { 0x00000000, 0x00000000, 0x00010000 }, /* CC_REG */ \
+ { 0x00000000, 0x00000000, 0x00020000 }, /* VFPCC_REG */ \
+ { 0x00000000, 0x00000000, 0x00040000 }, /* SFP_REG */ \
+ { 0x00000000, 0x00000000, 0x00080000 }, /* AFP_REG */ \
+ { 0x00000000, 0x00000000, 0x00400000 }, /* VPR_REG. */ \
+ { 0x00000000, 0x00000000, 0x00800000 }, /* PAC_REG. */ \
+ { 0x00005FFF, 0x00000000, 0x00400000 }, /* GENERAL_AND_VPR_REGS. */ \
+ { 0xFFFF7FFF, 0xFFFFFFFF, 0x0040FFFF } /* ALL_REGS. */ \
}
#define FP_SYSREGS \
@@ -1460,39 +1409,34 @@ extern const char *fp_sysreg_names[NB_FP_SYSREGS];
/* Return the register class of a scratch register needed to copy IN into
or out of a register in CLASS in MODE. If it can be done directly,
NO_REGS is returned. */
-#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
- /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \
- ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS)) \
- ? coproc_secondary_reload_class (MODE, X, FALSE) \
- : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) \
- ? coproc_secondary_reload_class (MODE, X, TRUE) \
- : TARGET_32BIT \
- ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
- ? GENERAL_REGS : NO_REGS) \
- : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+ /* Restrict which direct reloads are allowed for VFP regs. */ \
+ ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS)) \
+ ? coproc_secondary_reload_class (MODE, X, FALSE) \
+ : (TARGET_32BIT \
+ ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
+ ? GENERAL_REGS \
+ : NO_REGS) \
+ : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X)))
/* If we need to load shorts byte-at-a-time, then we need a scratch. */
-#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
- /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \
- ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS)) \
- ? coproc_secondary_reload_class (MODE, X, FALSE) : \
- (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ? \
- coproc_secondary_reload_class (MODE, X, TRUE) : \
- (TARGET_32BIT ? \
- (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS) \
- && CONSTANT_P (X)) \
- ? GENERAL_REGS : \
- (((MODE) == HImode && ! arm_arch4 \
- && (MEM_P (X) \
- || ((REG_P (X) || GET_CODE (X) == SUBREG) \
- && true_regnum (X) == -1))) \
- ? GENERAL_REGS : NO_REGS) \
- : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+ /* Restrict which direct reloads are allowed for VFP regs. */ \
+ ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS)) \
+ ? coproc_secondary_reload_class (MODE, X, FALSE) \
+ : (TARGET_32BIT \
+ ? (((MODE) == HImode \
+ && ! arm_arch4 \
+ && (MEM_P (X) \
+ || ((REG_P (X) || GET_CODE (X) == SUBREG) \
+ && true_regnum (X) == -1))) \
+ ? GENERAL_REGS \
+ : NO_REGS) \
+ : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
/* Return the maximum number of consecutive registers
needed to represent mode MODE in a register of class CLASS.
- ARM regs are UNITS_PER_WORD bits.
- FIXME: Is this true for iWMMX? */
+ ARM regs are UNITS_PER_WORD bits. */
#define CLASS_MAX_NREGS(CLASS, MODE) \
(CLASS == VPR_REG) \
? CEIL (GET_MODE_SIZE (MODE), 2) \
@@ -1672,7 +1616,6 @@ enum arm_pcs
{
ARM_PCS_AAPCS, /* Base standard AAPCS. */
ARM_PCS_AAPCS_VFP, /* Use VFP registers for floating point values. */
- ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors. */
/* This must be the last AAPCS variant. */
ARM_PCS_AAPCS_LOCAL, /* Private call within this compilation unit. */
ARM_PCS_ATPCS, /* ATPCS. */
@@ -1690,8 +1633,6 @@ typedef struct
{
/* This is the number of registers of arguments scanned so far. */
int nregs;
- /* This is the number of iWMMXt register arguments scanned so far. */
- int iwmmxt_nregs;
int named_count;
int nargs;
/* Which procedure call variant to use for this call. */
@@ -1739,9 +1680,7 @@ typedef struct
#define FUNCTION_ARG_REGNO_P(REGNO) \
(IN_RANGE ((REGNO), 0, 3) \
|| (TARGET_AAPCS_BASED && TARGET_HARD_FLOAT \
- && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \
- || (TARGET_IWMMXT_ABI \
- && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
+ && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)))
/* If your target environment doesn't prefix user functions with an
@@ -2257,7 +2196,11 @@ extern int making_const_table;
#define SELECT_CC_MODE(OP, X, Y) arm_select_cc_mode (OP, X, Y)
-#define REVERSIBLE_CC_MODE(MODE) 1
+/* Floating-point modes cannot be reversed unless we don't care about
+ NaNs. */
+#define REVERSIBLE_CC_MODE(MODE) \
+ (flag_finite_math_only \
+ || !((MODE) == CCFPmode || (MODE) == CCFPEmode))
#define REVERSE_CONDITION(CODE,MODE) \
(((MODE) == CCFPmode || (MODE) == CCFPEmode) \
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 597ef67..5e5e112 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -37,12 +37,12 @@
(LR_REGNUM 14) ; Return address register
(PC_REGNUM 15) ; Program counter
(LAST_ARM_REGNUM 15) ;
- (CC_REGNUM 100) ; Condition code pseudo register
- (VFPCC_REGNUM 101) ; VFP Condition code pseudo register
- (APSRQ_REGNUM 104) ; Q bit pseudo register
- (APSRGE_REGNUM 105) ; GE bits pseudo register
- (VPR_REGNUM 106) ; Vector Predication Register - MVE register.
- (RA_AUTH_CODE 107) ; Pseudo register to save PAC.
+ (CC_REGNUM 80) ; Condition code pseudo register
+ (VFPCC_REGNUM 81) ; VFP Condition code pseudo register
+ (APSRQ_REGNUM 84) ; Q bit pseudo register
+ (APSRGE_REGNUM 85) ; GE bits pseudo register
+ (VPR_REGNUM 86) ; Vector Predication Register - MVE register.
+ (RA_AUTH_CODE 87) ; Pseudo register to save PAC.
]
)
;; 3rd operand to select_dominance_cc_mode
@@ -149,7 +149,7 @@
; This attribute is used to compute attribute "enabled",
; use type "any" to enable an alternative in all cases.
(define_attr "arch" "any, a, t, 32, t1, t2, v6,nov6, v6t2, \
- v8mb, fix_vlldm, iwmmxt, iwmmxt2, armv6_or_vfpv3, \
+ v8mb, fix_vlldm, armv6_or_vfpv3, \
neon, mve"
(const_string "any"))
@@ -197,10 +197,6 @@
(match_test "fix_vlldm"))
(const_string "yes")
- (and (eq_attr "arch" "iwmmxt2")
- (match_test "TARGET_REALLY_IWMMXT2"))
- (const_string "yes")
-
(and (eq_attr "arch" "armv6_or_vfpv3")
(match_test "arm_arch6 || TARGET_VFP3"))
(const_string "yes")
@@ -362,18 +358,7 @@
alus_ext, alus_imm, alus_sreg,\
alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\
logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\
- logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\
- wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\
- wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\
- wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\
- wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\
- wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\
- wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\
- wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\
- wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\
- wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\
- wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\
- wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge")
+ logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel")
(const_string "single")
(const_string "multi")))
@@ -435,7 +420,6 @@
(const_string "yes")
(const_string "no"))))
-(include "marvell-f-iwmmxt.md")
(include "arm-generic.md")
(include "arm926ejs.md")
(include "arm1020e.md")
@@ -2893,14 +2877,12 @@
;; Split DImode and, ior, xor operations. Simply perform the logical
;; operation on the upper and lower halves of the registers.
;; This is needed for atomic operations in arm_split_atomic_op.
-;; Avoid splitting IWMMXT instructions.
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(match_operator:DI 6 "logical_binary_operator"
[(match_operand:DI 1 "s_register_operand" "")
(match_operand:DI 2 "s_register_operand" "")]))]
- "TARGET_32BIT && reload_completed
- && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+ "TARGET_32BIT && reload_completed"
[(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
(set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))]
"
@@ -6345,7 +6327,6 @@
"TARGET_32BIT
&& !(TARGET_HARD_FLOAT)
&& !(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT)
- && !TARGET_IWMMXT
&& ( register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode))"
"*
@@ -6554,7 +6535,7 @@
(define_insn "*arm_movsi_insn"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
(match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))]
- "TARGET_ARM && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
+ "TARGET_ARM && !TARGET_HARD_FLOAT
&& ( register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
"@
@@ -13123,10 +13104,8 @@
[(set_attr "conds" "unconditional")
(set_attr "type" "nop")])
-;; Vector bits common to IWMMXT, Neon and MVE
+;; Vector bits common to Neon and MVE
(include "vec-common.md")
-;; Load the Intel Wireless Multimedia Extension patterns
-(include "iwmmxt.md")
;; Load the VFP co-processor patterns
(include "vfp.md")
;; Thumb-1 patterns
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 042cb54..d5eeeae 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -58,9 +58,6 @@ EnumValue
Enum(arm_abi_type) String(aapcs) Value(ARM_ABI_AAPCS)
EnumValue
-Enum(arm_abi_type) String(iwmmxt) Value(ARM_ABI_IWMMXT)
-
-EnumValue
Enum(arm_abi_type) String(aapcs-linux) Value(ARM_ABI_AAPCS_LINUX)
mabort-on-noreturn
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 9f1a37a..24743a8 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -19,11 +19,12 @@
;; <http://www.gnu.org/licenses/>.
;; The following register constraints have been used:
-;; - in ARM/Thumb-2 state: t, w, x, y, z
+;; - in ARM/Thumb-2 state: t, w, x
;; - in Thumb state: h, b
;; - in both states: l, c, k, q, Cs, Ts, US
;; In ARM state, 'l' is an alias for 'r'
;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
+;; 'y' and 'z' were previously used for iWMMX registers (removed after gcc-15)
;; The following normal constraints have been used:
;; in ARM/Thumb-2 state: G, I, j, J, K, L, M
@@ -39,7 +40,7 @@
;; in all states: Pg
;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
+;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
;; in ARM state: Uq
;; in Thumb state: Uu, Uw
;; in all states: Q
@@ -112,13 +113,6 @@
(define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS"
"The VFP registers @code{d0}-@code{d7}.")
-(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS"
- "The Intel iWMMX co-processor registers.")
-
-(define_register_constraint "z"
- "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS"
- "The Intel iWMMX GR registers.")
-
(define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS"
"In Thumb state the core registers @code{r0}-@code{r7}.")
@@ -478,12 +472,6 @@
? arm_coproc_mem_operand_no_writeback (op)
: neon_vector_mem_operand (op, 2, true)")))
-(define_memory_constraint "Uy"
- "@internal
- In ARM/Thumb-2 state a valid iWMMX load/store address."
- (and (match_code "mem")
- (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)")))
-
(define_memory_constraint "Un"
"@internal
In ARM/Thumb-2 state a valid address for Neon doubleword vector
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 743fe48..0c163ed 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -59,30 +59,25 @@
;; A list of modes which the VFP unit can handle
(define_mode_iterator SDF [(SF "") (DF "TARGET_VFP_DOUBLE")])
-;; Integer element sizes implemented by IWMMXT.
-(define_mode_iterator VMMX [V2SI V4HI V8QI])
-
-(define_mode_iterator VMMX2 [V4HI V2SI])
-
;; Integer element sizes for shifts.
(define_mode_iterator VSHFT [V4HI V2SI DI])
-;; Integer and float modes supported by Neon and IWMMXT.
+;; Integer and float modes supported by Neon.
(define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
-;; Integer and float modes supported by Neon, IWMMXT and MVE.
+;; Integer and float modes supported by Neon and MVE.
(define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI])
-;; Integer and float modes supported by Neon and IWMMXT but not MVE.
+;; Integer and float modes supported by Neon but not MVE.
(define_mode_iterator VNINOTM1 [V2SI V4HI V8QI V2SF])
-;; Integer and float modes supported by Neon and IWMMXT, except V2DI.
+;; Integer and float modes supported by Neon, except V2DI.
(define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
-;; Integer modes supported by Neon and IWMMXT
+;; Integer modes supported by Neon
(define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI])
-;; Integer modes supported by Neon and IWMMXT, except V2DI
+;; Integer modes supported by Neon, except V2DI
(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
;; Double-width vector modes, on which we support arithmetic (no HF!)
@@ -1644,9 +1639,6 @@
;; distinguishes between 16-bit Thumb and 32-bit Thumb/ARM.
(define_mode_attr arch [(CC_Z "32") (SI "t1")])
-;; Determine element size suffix from vector mode.
-(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
-
;; vtbl<n> suffix for NEON vector modes.
(define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
deleted file mode 100644
index 0aa5dcd..0000000
--- a/gcc/config/arm/iwmmxt.md
+++ /dev/null
@@ -1,1766 +0,0 @@
-;; Patterns for the Intel Wireless MMX technology architecture.
-;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
-;; Contributed by Red Hat.
-
-;; This file is part of GCC.
-
-;; GCC is free software; you can redistribute it and/or modify it under
-;; the terms of the GNU General Public License as published by the Free
-;; Software Foundation; either version 3, or (at your option) any later
-;; version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3. If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; Register numbers. Need to sync with FIRST_IWMMXT_GR_REGNUM in arm.h
-(define_constants
- [(WCGR0 96)
- (WCGR1 97)
- (WCGR2 98)
- (WCGR3 99)
- ]
-)
-
-(define_insn "tbcstv8qi"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_duplicate:V8QI (match_operand:QI 1 "s_register_operand" "r")))]
- "TARGET_REALLY_IWMMXT"
- "tbcstb%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "tbcstv4hi"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_duplicate:V4HI (match_operand:HI 1 "s_register_operand" "r")))]
- "TARGET_REALLY_IWMMXT"
- "tbcsth%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "tbcstv2si"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_duplicate:V2SI (match_operand:SI 1 "s_register_operand" "r")))]
- "TARGET_REALLY_IWMMXT"
- "tbcstw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "iwmmxt_iordi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (ior:DI (match_operand:DI 1 "register_operand" "%y")
- (match_operand:DI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wor%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "length" "4")
- (set_attr "type" "wmmx_wor")]
-)
-
-(define_insn "iwmmxt_xordi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (xor:DI (match_operand:DI 1 "register_operand" "%y")
- (match_operand:DI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wxor%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "length" "4")
- (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_anddi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (and:DI (match_operand:DI 1 "register_operand" "%y")
- (match_operand:DI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wand%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "length" "4")
- (set_attr "type" "wmmx_wand")]
-)
-
-(define_insn "iwmmxt_nanddi3"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (and:DI (match_operand:DI 1 "register_operand" "y")
- (not:DI (match_operand:DI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wandn%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wandn")]
-)
-
-(define_insn "*iwmmxt_arm_movdi"
- [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")
- (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,r,y,Uy,y, r,*w,*w,*Uvi,*w"))]
- "TARGET_REALLY_IWMMXT
- && ( register_operand (operands[0], DImode)
- || register_operand (operands[1], DImode))"
- "*
- switch (which_alternative)
- {
- case 0:
- case 1:
- case 2:
- return \"#\";
- case 3: case 4:
- return output_move_double (operands, true, NULL);
- case 5:
- return \"wmov%?\\t%0,%1\";
- case 6:
- return \"tmcrr%?\\t%0,%Q1,%R1\";
- case 7:
- return \"tmrrc%?\\t%Q0,%R0,%1\";
- case 8:
- return \"wldrd%?\\t%0,%1\";
- case 9:
- return \"wstrd%?\\t%1,%0\";
- case 10:
- return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
- case 11:
- return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
- case 12:
- if (TARGET_VFP_SINGLE)
- return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
- else
- return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
- case 13: case 14:
- return output_move_vfp (operands);
- default:
- gcc_unreachable ();
- }
- "
- [(set (attr "length") (cond [(eq_attr "alternative" "0,3,4") (const_int 8)
- (eq_attr "alternative" "1") (const_int 12)
- (eq_attr "alternative" "2") (const_int 16)
- (eq_attr "alternative" "12")
- (if_then_else
- (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
- (const_int 8)
- (const_int 4))]
- (const_int 4)))
- (set_attr "type" "*,*,*,load_8,store_8,*,*,*,*,*,f_mcrr,f_mrrc,\
- ffarithd,f_loadd,f_stored")
- (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
- (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
-)
-
-(define_insn "*iwmmxt_movsi_insn"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk, m,z,r,?z,?Uy,*t, r,*t,*t ,*Uv")
- (match_operand:SI 1 "general_operand" " rk,I,K,j,mi,rk,r,z,Uy, z, r,*t,*t,*Uvi, *t"))]
- "TARGET_REALLY_IWMMXT
- && ( register_operand (operands[0], SImode)
- || register_operand (operands[1], SImode))"
- "*
- switch (which_alternative)
- {
- case 0: return \"mov\\t%0, %1\";
- case 1: return \"mov\\t%0, %1\";
- case 2: return \"mvn\\t%0, #%B1\";
- case 3: return \"movw\\t%0, %1\";
- case 4: return \"ldr\\t%0, %1\";
- case 5: return \"str\\t%1, %0\";
- case 6: return \"tmcr\\t%0, %1\";
- case 7: return \"tmrc\\t%0, %1\";
- case 8: return arm_output_load_gr (operands);
- case 9: return \"wstrw\\t%1, %0\";
- case 10:return \"fmsr\\t%0, %1\";
- case 11:return \"fmrs\\t%0, %1\";
- case 12:return \"fcpys\\t%0, %1\\t%@ int\";
- case 13: case 14:
- return output_move_vfp (operands);
- default:
- gcc_unreachable ();
- }"
- [(set_attr "type" "*,*,*,*,load_4,store_4,*,*,*,*,f_mcr,f_mrc,\
- fmov,f_loads,f_stores")
- (set_attr "length" "*,*,*,*,*, *,*,*, 16, *,*,*,*,*,*")
- (set_attr "pool_range" "*,*,*,*,4096, *,*,*,1024, *,*,*,*,1020,*")
- (set_attr "neg_pool_range" "*,*,*,*,4084, *,*,*, *, 1012,*,*,*,1008,*")
- ;; Note - the "predicable" attribute is not allowed to have alternatives.
- ;; Since the wSTRw wCx instruction is not predicable, we cannot support
- ;; predicating any of the alternatives in this template. Instead,
- ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn.
- (set_attr "predicable" "no")
- ;; Also - we have to pretend that these insns clobber the condition code
- ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
- ;; them.
- (set_attr "conds" "clob")]
-)
-
-;; Because iwmmxt_movsi_insn is not predicable, we provide the
-;; cond_exec version explicitly, with appropriate constraints.
-
-(define_insn "*cond_iwmmxt_movsi_insn"
- [(cond_exec
- (match_operator 2 "arm_comparison_operator"
- [(match_operand 3 "cc_register" "")
- (const_int 0)])
- (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r")
- (match_operand:SI 1 "general_operand" "rI,K,mi,r,r,z")))]
- "TARGET_REALLY_IWMMXT
- && ( register_operand (operands[0], SImode)
- || register_operand (operands[1], SImode))"
- "*
- switch (which_alternative)
- {
- case 0: return \"mov%?\\t%0, %1\";
- case 1: return \"mvn%?\\t%0, #%B1\";
- case 2: return \"ldr%?\\t%0, %1\";
- case 3: return \"str%?\\t%1, %0\";
- case 4: return \"tmcr%?\\t%0, %1\";
- default: return \"tmrc%?\\t%0, %1\";
- }"
- [(set_attr "type" "*,*,load_4,store_4,*,*")
- (set_attr "pool_range" "*,*,4096, *,*,*")
- (set_attr "neg_pool_range" "*,*,4084, *,*,*")]
-)
-
-(define_insn "mov<mode>_internal"
- [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m")
- (match_operand:VMMX 1 "general_operand" "y,y,mi,y,r,r,mi,r"))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0: return \"wmov%?\\t%0, %1\";
- case 1: return \"wstrd%?\\t%1, %0\";
- case 2: return \"wldrd%?\\t%0, %1\";
- case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
- case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
- case 5: return \"#\";
- default: return output_move_double (operands, true, NULL);
- }"
- [(set_attr "predicable" "yes")
- (set_attr "length" "4, 4, 4,4,4,8, 8,8")
- (set_attr "type" "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load_4,store_4")
- (set_attr "pool_range" "*, *, 256,*,*,*, 256,*")
- (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")]
-)
-
-(define_expand "iwmmxt_setwcgr0"
- [(set (reg:SI WCGR0)
- (match_operand:SI 0 "register_operand"))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_setwcgr1"
- [(set (reg:SI WCGR1)
- (match_operand:SI 0 "register_operand"))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_setwcgr2"
- [(set (reg:SI WCGR2)
- (match_operand:SI 0 "register_operand"))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_setwcgr3"
- [(set (reg:SI WCGR3)
- (match_operand:SI 0 "register_operand"))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_getwcgr0"
- [(set (match_operand:SI 0 "register_operand")
- (reg:SI WCGR0))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_getwcgr1"
- [(set (match_operand:SI 0 "register_operand")
- (reg:SI WCGR1))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_getwcgr2"
- [(set (match_operand:SI 0 "register_operand")
- (reg:SI WCGR2))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_expand "iwmmxt_getwcgr3"
- [(set (match_operand:SI 0 "register_operand")
- (reg:SI WCGR3))]
- "TARGET_REALLY_IWMMXT"
- {}
-)
-
-(define_insn "*and<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (and:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wand\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wand")]
-)
-
-(define_insn "*ior<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (ior:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wor\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wor")]
-)
-
-(define_insn "*xor<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (xor:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wxor\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wxor")]
-)
-
-
-;; Vector add/subtract
-
-(define_insn "*add<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (plus:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wadd<MMX_char>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "waddbss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "waddhss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (ss_plus:V2SI (match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "waddwss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "waddbus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "waddhus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (us_plus:V2SI (match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "waddwus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "*sub<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (minus:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsub<MMX_char>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsubbss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsubhss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsubwss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsubbus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsubhus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wsubwus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "*mulv4hi3_iwmmxt"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wmulul%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "smulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate:V4HI
- (lshiftrt:V4SI
- (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
- (const_int 16))))]
- "TARGET_REALLY_IWMMXT"
- "wmulsm%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "umulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate:V4HI
- (lshiftrt:V4SI
- (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
- (const_int 16))))]
- "TARGET_REALLY_IWMMXT"
- "wmulum%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmacs"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:DI 1 "register_operand" "0")
- (match_operand:V4HI 2 "register_operand" "y")
- (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))]
- "TARGET_REALLY_IWMMXT"
- "wmacs%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacsz"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))]
- "TARGET_REALLY_IWMMXT"
- "wmacsz%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacu"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:DI 1 "register_operand" "0")
- (match_operand:V4HI 2 "register_operand" "y")
- (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))]
- "TARGET_REALLY_IWMMXT"
- "wmacu%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacuz"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))]
- "TARGET_REALLY_IWMMXT"
- "wmacuz%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmac")]
-)
-
-;; Same as xordi3, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "iwmmxt_clrdi"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(const_int 0)] UNSPEC_CLRDI))]
- "TARGET_REALLY_IWMMXT"
- "wxor%?\\t%0, %0, %0"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wxor")]
-)
-
-;; Seems like cse likes to generate these, so we have to support them.
-
-(define_insn "iwmmxt_clrv8qi"
- [(set (match_operand:V8QI 0 "s_register_operand" "=y")
- (const_vector:V8QI [(const_int 0) (const_int 0)
- (const_int 0) (const_int 0)
- (const_int 0) (const_int 0)
- (const_int 0) (const_int 0)]))]
- "TARGET_REALLY_IWMMXT"
- "wxor%?\\t%0, %0, %0"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_clrv4hi"
- [(set (match_operand:V4HI 0 "s_register_operand" "=y")
- (const_vector:V4HI [(const_int 0) (const_int 0)
- (const_int 0) (const_int 0)]))]
- "TARGET_REALLY_IWMMXT"
- "wxor%?\\t%0, %0, %0"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_clrv2si"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (const_vector:V2SI [(const_int 0) (const_int 0)]))]
- "TARGET_REALLY_IWMMXT"
- "wxor%?\\t%0, %0, %0"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wxor")]
-)
-
-;; Unsigned averages/sum of absolute differences
-
-(define_insn "iwmmxt_uavgrndv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (truncate:V8QI
- (lshiftrt:V8HI
- (plus:V8HI
- (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
- (const_vector:V8HI [(const_int 1)
- (const_int 1)
- (const_int 1)
- (const_int 1)
- (const_int 1)
- (const_int 1)
- (const_int 1)
- (const_int 1)]))
- (const_int 1))))]
- "TARGET_REALLY_IWMMXT"
- "wavg2br%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgrndv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate:V4HI
- (lshiftrt:V4SI
- (plus:V4SI
- (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
- (const_vector:V4SI [(const_int 1)
- (const_int 1)
- (const_int 1)
- (const_int 1)]))
- (const_int 1))))]
- "TARGET_REALLY_IWMMXT"
- "wavg2hr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (truncate:V8QI
- (lshiftrt:V8HI
- (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
- (const_int 1))))]
- "TARGET_REALLY_IWMMXT"
- "wavg2b%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate:V4HI
- (lshiftrt:V4SI
- (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
- (const_int 1))))]
- "TARGET_REALLY_IWMMXT"
- "wavg2h%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wavg2")]
-)
-
-;; Insert/extract/shuffle
-
-(define_insn "iwmmxt_tinsrb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_merge:V8QI
- (vec_duplicate:V8QI
- (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r")))
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- {
- return arm_output_iwmmxt_tinsr (operands);
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_tinsrh"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_merge:V4HI
- (vec_duplicate:V4HI
- (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r")))
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- {
- return arm_output_iwmmxt_tinsr (operands);
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_tinsrw"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_merge:V2SI
- (vec_duplicate:V2SI
- (match_operand:SI 2 "nonimmediate_operand" "r"))
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- {
- return arm_output_iwmmxt_tinsr (operands);
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_textrmub"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y")
- (parallel
- [(match_operand:SI 2 "immediate_operand" "i")]))))]
- "TARGET_REALLY_IWMMXT"
- "textrmub%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmsb"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (sign_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y")
- (parallel
- [(match_operand:SI 2 "immediate_operand" "i")]))))]
- "TARGET_REALLY_IWMMXT"
- "textrmsb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmuh"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
- (parallel
- [(match_operand:SI 2 "immediate_operand" "i")]))))]
- "TARGET_REALLY_IWMMXT"
- "textrmuh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmsh"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (sign_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
- (parallel
- [(match_operand:SI 2 "immediate_operand" "i")]))))]
- "TARGET_REALLY_IWMMXT"
- "textrmsh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_textrm")]
-)
-
-;; There are signed/unsigned variants of this instruction, but they are
-;; pointless.
-(define_insn "iwmmxt_textrmw"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
- (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
- "TARGET_REALLY_IWMMXT"
- "textrmsw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_wshufh"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_WSHUFH))]
- "TARGET_REALLY_IWMMXT"
- "wshufh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wshufh")]
-)
-
-;; Mask-generating comparisons
-;;
-;; Note - you cannot use patterns like these here:
-;;
-;; (set (match:<vector>) (<comparator>:<vector> (match:<vector>) (match:<vector>)))
-;;
-;; Because GCC will assume that the truth value (1 or 0) is installed
-;; into the entire destination vector, (with the '1' going into the least
-;; significant element of the vector). This is not how these instructions
-;; behave.
-
-(define_insn "eqv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")]
- VUNSPEC_WCMP_EQ))]
- "TARGET_REALLY_IWMMXT"
- "wcmpeqb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "eqv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")]
- VUNSPEC_WCMP_EQ))]
- "TARGET_REALLY_IWMMXT"
- "wcmpeqh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "eqv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec_volatile:V2SI
- [(match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")]
- VUNSPEC_WCMP_EQ))]
- "TARGET_REALLY_IWMMXT"
- "wcmpeqw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "gtuv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")]
- VUNSPEC_WCMP_GTU))]
- "TARGET_REALLY_IWMMXT"
- "wcmpgtub%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtuv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")]
- VUNSPEC_WCMP_GTU))]
- "TARGET_REALLY_IWMMXT"
- "wcmpgtuh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtuv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")]
- VUNSPEC_WCMP_GTU))]
- "TARGET_REALLY_IWMMXT"
- "wcmpgtuw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")]
- VUNSPEC_WCMP_GT))]
- "TARGET_REALLY_IWMMXT"
- "wcmpgtsb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")]
- VUNSPEC_WCMP_GT))]
- "TARGET_REALLY_IWMMXT"
- "wcmpgtsh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv2si3"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")]
- VUNSPEC_WCMP_GT))]
- "TARGET_REALLY_IWMMXT"
- "wcmpgtsw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wcmpgt")]
-)
-
-;; Max/min insns
-
-(define_insn "*smax<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (smax:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wmaxs<MMX_char>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmax")]
-)
-
-(define_insn "*umax<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (umax:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wmaxu<MMX_char>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmax")]
-)
-
-(define_insn "*smin<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (smin:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wmins<MMX_char>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmin")]
-)
-
-(define_insn "*umin<mode>3_iwmmxt"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (umin:VMMX (match_operand:VMMX 1 "register_operand" "y")
- (match_operand:VMMX 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wminu<MMX_char>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmin")]
-)
-
-;; Pack/unpack insns.
-
-(define_insn "iwmmxt_wpackhss"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
- (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wpackhss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackwss"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_concat:V4HI
- (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
- (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wpackwss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackdss"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_concat:V2SI
- (ss_truncate:SI (match_operand:DI 1 "register_operand" "y"))
- (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wpackdss%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackhus"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
- (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wpackhus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackwus"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_concat:V4HI
- (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
- (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wpackwus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackdus"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_concat:V2SI
- (us_truncate:SI (match_operand:DI 1 "register_operand" "y"))
- (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
- "TARGET_REALLY_IWMMXT"
- "wpackdus%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wunpckihb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_merge:V8QI
- (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
- (parallel [(const_int 4)
- (const_int 0)
- (const_int 5)
- (const_int 1)
- (const_int 6)
- (const_int 2)
- (const_int 7)
- (const_int 3)]))
- (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 4)
- (const_int 1)
- (const_int 5)
- (const_int 2)
- (const_int 6)
- (const_int 3)
- (const_int 7)]))
- (const_int 85)))]
- "TARGET_REALLY_IWMMXT"
- "wunpckihb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckihh"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_merge:V4HI
- (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (parallel [(const_int 2)
- (const_int 0)
- (const_int 3)
- (const_int 1)]))
- (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 2)
- (const_int 1)
- (const_int 3)]))
- (const_int 5)))]
- "TARGET_REALLY_IWMMXT"
- "wunpckihh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckihw"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_merge:V2SI
- (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
- (parallel [(const_int 1)
- (const_int 0)]))
- (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 1)]))
- (const_int 1)))]
- "TARGET_REALLY_IWMMXT"
- "wunpckihw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckilb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_merge:V8QI
- (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 4)
- (const_int 1)
- (const_int 5)
- (const_int 2)
- (const_int 6)
- (const_int 3)
- (const_int 7)]))
- (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
- (parallel [(const_int 4)
- (const_int 0)
- (const_int 5)
- (const_int 1)
- (const_int 6)
- (const_int 2)
- (const_int 7)
- (const_int 3)]))
- (const_int 85)))]
- "TARGET_REALLY_IWMMXT"
- "wunpckilb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckilh"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_merge:V4HI
- (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 2)
- (const_int 1)
- (const_int 3)]))
- (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 2)
- (const_int 0)
- (const_int 3)
- (const_int 1)]))
- (const_int 5)))]
- "TARGET_REALLY_IWMMXT"
- "wunpckilh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckilw"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_merge:V2SI
- (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 1)]))
- (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
- (parallel [(const_int 1)
- (const_int 0)]))
- (const_int 1)))]
- "TARGET_REALLY_IWMMXT"
- "wunpckilw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckehub"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_select:V4HI
- (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (parallel [(const_int 4) (const_int 5)
- (const_int 6) (const_int 7)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckehub%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehuh"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI
- (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckehuh%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehuw"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (vec_select:DI
- (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (parallel [(const_int 1)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckehuw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsb"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_select:V4HI
- (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (parallel [(const_int 4) (const_int 5)
- (const_int 6) (const_int 7)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckehsb%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsh"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI
- (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckehsh%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsw"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (vec_select:DI
- (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (parallel [(const_int 1)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckehsw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckelub"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_select:V4HI
- (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 1)
- (const_int 2) (const_int 3)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckelub%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckeluh"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI
- (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckeluh%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckeluw"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (vec_select:DI
- (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (parallel [(const_int 0)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckeluw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsb"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_select:V4HI
- (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 1)
- (const_int 2) (const_int 3)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckelsb%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsh"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI
- (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckelsh%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsw"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (vec_select:DI
- (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (parallel [(const_int 0)])))]
- "TARGET_REALLY_IWMMXT"
- "wunpckelsw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wunpckel")]
-)
-
-;; Shifts
-
-(define_insn "ror<mode>3"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:SI 2 "imm_or_reg_operand" "z,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wror<MMX_char>g%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wror, wmmx_wror")]
-)
-
-(define_insn "ashr<mode>3_iwmmxt"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:SI 2 "imm_or_reg_operand" "z,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wsra<MMX_char>g%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wsra, wmmx_wsra")]
-)
-
-(define_insn "lshr<mode>3_iwmmxt"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:SI 2 "imm_or_reg_operand" "z,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wsrl<MMX_char>g%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
-)
-
-(define_insn "ashl<mode>3_iwmmxt"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:SI 2 "imm_or_reg_operand" "z,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wsll<MMX_char>g%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wsll, wmmx_wsll")]
-)
-
-(define_insn "ror<mode>3_di"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:DI 2 "imm_or_reg_operand" "y,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wror<MMX_char>%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wror, wmmx_wror")]
-)
-
-(define_insn "ashr<mode>3_di"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:DI 2 "imm_or_reg_operand" "y,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wsra<MMX_char>%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wsra, wmmx_wsra")]
-)
-
-(define_insn "lshr<mode>3_di"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:DI 2 "register_operand" "y,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wsrl<MMX_char>%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
-)
-
-(define_insn "ashl<mode>3_di"
- [(set (match_operand:VSHFT 0 "register_operand" "=y,y")
- (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
- (match_operand:DI 2 "imm_or_reg_operand" "y,i")))]
- "TARGET_REALLY_IWMMXT"
- "*
- switch (which_alternative)
- {
- case 0:
- return \"wsll<MMX_char>%?\\t%0, %1, %2\";
- case 1:
- return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
- default:
- gcc_unreachable ();
- }
- "
- [(set_attr "predicable" "yes")
- (set_attr "arch" "*, iwmmxt2")
- (set_attr "type" "wmmx_wsll, wmmx_wsll")]
-)
-
-(define_insn "iwmmxt_wmadds"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI
- (mult:V2SI
- (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 1) (const_int 3)]))
- (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
- (parallel [(const_int 1) (const_int 3)])))
- (mult:V2SI
- (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
- (parallel [(const_int 0) (const_int 2)]))
- (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
- (parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmadds%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddu"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI
- (mult:V2SI
- (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 1) (const_int 3)]))
- (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
- (parallel [(const_int 1) (const_int 3)])))
- (mult:V2SI
- (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
- (parallel [(const_int 0) (const_int 2)]))
- (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
- (parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddu%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_tmia"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (mult:DI (sign_extend:DI
- (match_operand:SI 2 "register_operand" "r"))
- (sign_extend:DI
- (match_operand:SI 3 "register_operand" "r")))))]
- "TARGET_REALLY_IWMMXT"
- "tmia%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmia")]
-)
-
-(define_insn "iwmmxt_tmiaph"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI (sign_extend:DI
- (truncate:HI (match_operand:SI 2 "register_operand" "r")))
- (sign_extend:DI
- (truncate:HI (match_operand:SI 3 "register_operand" "r"))))
- (mult:DI (sign_extend:DI
- (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16))))
- (sign_extend:DI
- (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))]
- "TARGET_REALLY_IWMMXT"
- "tmiaph%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmiaph")]
-)
-
-(define_insn "iwmmxt_tmiabb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (mult:DI (sign_extend:DI
- (truncate:HI (match_operand:SI 2 "register_operand" "r")))
- (sign_extend:DI
- (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
- "TARGET_REALLY_IWMMXT"
- "tmiabb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiatb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (mult:DI (sign_extend:DI
- (truncate:HI
- (ashiftrt:SI
- (match_operand:SI 2 "register_operand" "r")
- (const_int 16))))
- (sign_extend:DI
- (truncate:HI
- (match_operand:SI 3 "register_operand" "r"))))))]
- "TARGET_REALLY_IWMMXT"
- "tmiatb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiabt"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (mult:DI (sign_extend:DI
- (truncate:HI
- (match_operand:SI 2 "register_operand" "r")))
- (sign_extend:DI
- (truncate:HI
- (ashiftrt:SI
- (match_operand:SI 3 "register_operand" "r")
- (const_int 16)))))))]
- "TARGET_REALLY_IWMMXT"
- "tmiabt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiatt"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (mult:DI (sign_extend:DI
- (truncate:HI
- (ashiftrt:SI
- (match_operand:SI 2 "register_operand" "r")
- (const_int 16))))
- (sign_extend:DI
- (truncate:HI
- (ashiftrt:SI
- (match_operand:SI 3 "register_operand" "r")
- (const_int 16)))))))]
- "TARGET_REALLY_IWMMXT"
- "tmiatt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmovmskb"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
- "TARGET_REALLY_IWMMXT"
- "tmovmskb%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_tmovmskh"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
- "TARGET_REALLY_IWMMXT"
- "tmovmskh%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_tmovmskw"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
- "TARGET_REALLY_IWMMXT"
- "tmovmskw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_waccb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))]
- "TARGET_REALLY_IWMMXT"
- "waccb%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wacc")]
-)
-
-(define_insn "iwmmxt_wacch"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))]
- "TARGET_REALLY_IWMMXT"
- "wacch%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wacc")]
-)
-
-(define_insn "iwmmxt_waccw"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))]
- "TARGET_REALLY_IWMMXT"
- "waccw%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wacc")]
-)
-
-;; use unspec here to prevent 8 * imm to be optimized by cse
-(define_insn "iwmmxt_waligni"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (unspec:V8QI [(subreg:V8QI
- (ashiftrt:TI
- (subreg:TI (vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")) 0)
- (mult:SI
- (match_operand:SI 3 "immediate_operand" "i")
- (const_int 8))) 0)] UNSPEC_WALIGNI))]
- "TARGET_REALLY_IWMMXT"
- "waligni%?\\t%0, %1, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_waligni")]
-)
-
-(define_insn "iwmmxt_walignr"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (subreg:V8QI (ashiftrt:TI
- (subreg:TI (vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")) 0)
- (mult:SI
- (zero_extract:SI (match_operand:SI 3 "register_operand" "z") (const_int 3) (const_int 0))
- (const_int 8))) 0))]
- "TARGET_REALLY_IWMMXT"
- "walignr%U3%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr0"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (subreg:V8QI (ashiftrt:TI
- (subreg:TI (vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")) 0)
- (mult:SI
- (zero_extract:SI (reg:SI WCGR0) (const_int 3) (const_int 0))
- (const_int 8))) 0))]
- "TARGET_REALLY_IWMMXT"
- "walignr0%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr1"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (subreg:V8QI (ashiftrt:TI
- (subreg:TI (vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")) 0)
- (mult:SI
- (zero_extract:SI (reg:SI WCGR1) (const_int 3) (const_int 0))
- (const_int 8))) 0))]
- "TARGET_REALLY_IWMMXT"
- "walignr1%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr2"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (subreg:V8QI (ashiftrt:TI
- (subreg:TI (vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")) 0)
- (mult:SI
- (zero_extract:SI (reg:SI WCGR2) (const_int 3) (const_int 0))
- (const_int 8))) 0))]
- "TARGET_REALLY_IWMMXT"
- "walignr2%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr3"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (subreg:V8QI (ashiftrt:TI
- (subreg:TI (vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")) 0)
- (mult:SI
- (zero_extract:SI (reg:SI WCGR3) (const_int 3) (const_int 0))
- (const_int 8))) 0))]
- "TARGET_REALLY_IWMMXT"
- "walignr3%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_wsadb"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V8QI 2 "register_operand" "y")
- (match_operand:V8QI 3 "register_operand" "y")] UNSPEC_WSAD))]
- "TARGET_REALLY_IWMMXT"
- "wsadb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadh"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V4HI 2 "register_operand" "y")
- (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WSAD))]
- "TARGET_REALLY_IWMMXT"
- "wsadh%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadbz"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))]
- "TARGET_REALLY_IWMMXT"
- "wsadbz%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadhz"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))]
- "TARGET_REALLY_IWMMXT"
- "wsadhz%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsad")]
-)
-
-(include "iwmmxt2.md")
diff --git a/gcc/config/arm/iwmmxt2.md b/gcc/config/arm/iwmmxt2.md
deleted file mode 100644
index 74cd148..0000000
--- a/gcc/config/arm/iwmmxt2.md
+++ /dev/null
@@ -1,903 +0,0 @@
-;; Patterns for the Intel Wireless MMX technology architecture.
-;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
-;; Written by Marvell, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 3, or (at your
-;; option) any later version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3. If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_insn "iwmmxt_wabs<mode>3"
- [(set (match_operand:VMMX 0 "register_operand" "=y")
- (unspec:VMMX [(match_operand:VMMX 1 "register_operand" "y")] UNSPEC_WABS))]
- "TARGET_REALLY_IWMMXT"
- "wabs<MMX_char>%?\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wabs")]
-)
-
-(define_insn "iwmmxt_wabsdiffb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (truncate:V8QI
- (abs:V8HI
- (minus:V8HI
- (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_wabsdiffh"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate: V4HI
- (abs:V4SI
- (minus:V4SI
- (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffh%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_wabsdiffw"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (truncate: V2SI
- (abs:V2DI
- (minus:V2DI
- (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_waddsubhx"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_merge:V4HI
- (ss_minus:V4HI
- (match_operand:V4HI 1 "register_operand" "y")
- (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
- (ss_plus:V4HI
- (match_dup 1)
- (vec_select:V4HI (match_dup 2)
- (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
- (const_int 10)))]
- "TARGET_REALLY_IWMMXT"
- "waddsubhx%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_waddsubhx")]
-)
-
-(define_insn "iwmmxt_wsubaddhx"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (vec_merge:V4HI
- (ss_plus:V4HI
- (match_operand:V4HI 1 "register_operand" "y")
- (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
- (ss_minus:V4HI
- (match_dup 1)
- (vec_select:V4HI (match_dup 2)
- (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
- (const_int 10)))]
- "TARGET_REALLY_IWMMXT"
- "wsubaddhx%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wsubaddhx")]
-)
-
-(define_insn "addc<mode>3"
- [(set (match_operand:VMMX2 0 "register_operand" "=y")
- (unspec:VMMX2
- [(plus:VMMX2
- (match_operand:VMMX2 1 "register_operand" "y")
- (match_operand:VMMX2 2 "register_operand" "y"))] UNSPEC_WADDC))]
- "TARGET_REALLY_IWMMXT"
- "wadd<MMX_char>c%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "iwmmxt_avg4"
-[(set (match_operand:V8QI 0 "register_operand" "=y")
- (truncate:V8QI
- (vec_select:V8HI
- (vec_merge:V8HI
- (lshiftrt:V8HI
- (plus:V8HI
- (plus:V8HI
- (plus:V8HI
- (plus:V8HI
- (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
- (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
- (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
- (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
- (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
- (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
- (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
- (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)
- (const_int 1) (const_int 1) (const_int 1) (const_int 1)]))
- (const_int 2))
- (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
- (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
- (const_int 254))
- (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
- (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
- "TARGET_REALLY_IWMMXT"
- "wavg4%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wavg4")]
-)
-
-(define_insn "iwmmxt_avg4r"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (truncate:V8QI
- (vec_select:V8HI
- (vec_merge:V8HI
- (lshiftrt:V8HI
- (plus:V8HI
- (plus:V8HI
- (plus:V8HI
- (plus:V8HI
- (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
- (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
- (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
- (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
- (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
- (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
- (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
- (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
- (const_vector:V8HI [(const_int 2) (const_int 2) (const_int 2) (const_int 2)
- (const_int 2) (const_int 2) (const_int 2) (const_int 2)]))
- (const_int 2))
- (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
- (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
- (const_int 254))
- (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
- (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
- "TARGET_REALLY_IWMMXT"
- "wavg4r%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wavg4")]
-)
-
-(define_insn "iwmmxt_wmaddsx"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI
- (mult:V2SI
- (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 1) (const_int 3)]))
- (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 2)])))
- (mult:V2SI
- (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
- (parallel [(const_int 0) (const_int 2)]))
- (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
- (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddsx%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddux"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI
- (mult:V2SI
- (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 1) (const_int 3)]))
- (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 2)])))
- (mult:V2SI
- (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
- (parallel [(const_int 0) (const_int 2)]))
- (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
- (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddux%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddsn"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (minus:V2SI
- (mult:V2SI
- (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 2)]))
- (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 2)])))
- (mult:V2SI
- (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
- (parallel [(const_int 1) (const_int 3)]))
- (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
- (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddsn%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddun"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (minus:V2SI
- (mult:V2SI
- (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 2)]))
- (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
- (parallel [(const_int 0) (const_int 2)])))
- (mult:V2SI
- (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
- (parallel [(const_int 1) (const_int 3)]))
- (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
- (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddun%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmulwsm"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (truncate:V2SI
- (ashiftrt:V2DI
- (mult:V2DI
- (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
- (const_int 32))))]
- "TARGET_REALLY_IWMMXT"
- "wmulwsm%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulwum"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (truncate:V2SI
- (lshiftrt:V2DI
- (mult:V2DI
- (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
- (const_int 32))))]
- "TARGET_REALLY_IWMMXT"
- "wmulwum%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulsmr"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate:V4HI
- (ashiftrt:V4SI
- (plus:V4SI
- (mult:V4SI
- (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
- (const_vector:V4SI [(const_int 32768)
- (const_int 32768)
- (const_int 32768)]))
- (const_int 16))))]
- "TARGET_REALLY_IWMMXT"
- "wmulsmr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulumr"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (truncate:V4HI
- (lshiftrt:V4SI
- (plus:V4SI
- (mult:V4SI
- (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
- (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
- (const_vector:V4SI [(const_int 32768)
- (const_int 32768)
- (const_int 32768)
- (const_int 32768)]))
- (const_int 16))))]
- "TARGET_REALLY_IWMMXT"
- "wmulumr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulwsmr"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (truncate:V2SI
- (ashiftrt:V2DI
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
- (const_vector:V2DI [(const_int 2147483648)
- (const_int 2147483648)]))
- (const_int 32))))]
- "TARGET_REALLY_IWMMXT"
- "wmulwsmr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulwumr"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (truncate:V2SI
- (lshiftrt:V2DI
- (plus:V2DI
- (mult:V2DI
- (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
- (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
- (const_vector:V2DI [(const_int 2147483648)
- (const_int 2147483648)]))
- (const_int 32))))]
- "TARGET_REALLY_IWMMXT"
- "wmulwumr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulwl"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (mult:V2SI
- (match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")))]
- "TARGET_REALLY_IWMMXT"
- "wmulwl%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wqmulm"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULM))]
- "TARGET_REALLY_IWMMXT"
- "wqmulm%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmulm")]
-)
-
-(define_insn "iwmmxt_wqmulwm"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWM))]
- "TARGET_REALLY_IWMMXT"
- "wqmulwm%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmulwm")]
-)
-
-(define_insn "iwmmxt_wqmulmr"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
- (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
- (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULMR))]
- "TARGET_REALLY_IWMMXT"
- "wqmulmr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmulm")]
-)
-
-(define_insn "iwmmxt_wqmulwmr"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
- (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWMR))]
- "TARGET_REALLY_IWMMXT"
- "wqmulwmr%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmulwm")]
-)
-
-(define_insn "iwmmxt_waddbhusm"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])
- (us_truncate:V4QI
- (ss_plus:V4HI
- (match_operand:V4HI 1 "register_operand" "y")
- (zero_extend:V4HI
- (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
- (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))))))]
- "TARGET_REALLY_IWMMXT"
- "waddbhusm%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_waddbhus")]
-)
-
-(define_insn "iwmmxt_waddbhusl"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (vec_concat:V8QI
- (us_truncate:V4QI
- (ss_plus:V4HI
- (match_operand:V4HI 1 "register_operand" "y")
- (zero_extend:V4HI
- (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
- (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))))
- (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])))]
- "TARGET_REALLY_IWMMXT"
- "waddbhusl%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_waddbhus")]
-)
-
-(define_insn "iwmmxt_wqmiabb"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
- "TARGET_REALLY_IWMMXT"
- "wqmiabb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabt"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
- "TARGET_REALLY_IWMMXT"
- "wqmiabt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatb"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
- "TARGET_REALLY_IWMMXT"
- "wqmiatb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatt"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
- "TARGET_REALLY_IWMMXT"
- "wqmiatt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabbn"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
- "TARGET_REALLY_IWMMXT"
- "wqmiabbn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabtn"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
- "TARGET_REALLY_IWMMXT"
- "wqmiabtn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatbn"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
- "TARGET_REALLY_IWMMXT"
- "wqmiatbn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiattn"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
- (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0")
- (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
- (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
- (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
- "TARGET_REALLY_IWMMXT"
- "wqmiattn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 0)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 0)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 2)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 2)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiabb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabt"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 0)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 1)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 2)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 3)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiabt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 1)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 0)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 3)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 2)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiatb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatt"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 1)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 1)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 3)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 3)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiatt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabbn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 0)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 0)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 2)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 2)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiabbn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabtn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 0)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 1)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 2)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 3)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiabtn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatbn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 1)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 0)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 3)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 2)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiatbn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiattn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI (match_operand:DI 1 "register_operand" "0")
- (plus:DI
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
- (parallel [(const_int 1)])))
- (sign_extend:DI
- (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
- (parallel [(const_int 1)]))))
- (mult:DI
- (sign_extend:DI
- (vec_select:HI (match_dup 2)
- (parallel [(const_int 3)])))
- (sign_extend:DI
- (vec_select:HI (match_dup 3)
- (parallel [(const_int 3)])))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiattn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiawbb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawbb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbt"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawbt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtb"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawtb%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtt"
-[(set (match_operand:DI 0 "register_operand" "=y")
- (plus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawtt%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbbn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawbbn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbtn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawbtn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtbn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawtbn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawttn"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (minus:DI
- (match_operand:DI 1 "register_operand" "0")
- (mult:DI
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
- (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
- "TARGET_REALLY_IWMMXT"
- "wmiawttn%?\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmerge"
- [(set (match_operand:DI 0 "register_operand" "=y")
- (ior:DI
- (ashift:DI
- (match_operand:DI 2 "register_operand" "y")
- (minus:SI
- (const_int 64)
- (mult:SI
- (match_operand:SI 3 "immediate_operand" "i")
- (const_int 8))))
- (lshiftrt:DI
- (ashift:DI
- (match_operand:DI 1 "register_operand" "y")
- (mult:SI
- (match_dup 3)
- (const_int 8)))
- (mult:SI
- (match_dup 3)
- (const_int 8)))))]
- "TARGET_REALLY_IWMMXT"
- "wmerge%?\\t%0, %1, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_wmerge")]
-)
-
-(define_insn "iwmmxt_tandc<mode>3"
- [(set (reg:CC CC_REGNUM)
- (subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TANDC) 0))
- (unspec:CC [(reg:SI 15)] UNSPEC_TANDC)]
- "TARGET_REALLY_IWMMXT"
- "tandc<MMX_char>%?\\t r15"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_tandc")]
-)
-
-(define_insn "iwmmxt_torc<mode>3"
- [(set (reg:CC CC_REGNUM)
- (subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORC) 0))
- (unspec:CC [(reg:SI 15)] UNSPEC_TORC)]
- "TARGET_REALLY_IWMMXT"
- "torc<MMX_char>%?\\t r15"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_torc")]
-)
-
-(define_insn "iwmmxt_torvsc<mode>3"
- [(set (reg:CC CC_REGNUM)
- (subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORVSC) 0))
- (unspec:CC [(reg:SI 15)] UNSPEC_TORVSC)]
- "TARGET_REALLY_IWMMXT"
- "torvsc<MMX_char>%?\\t r15"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_torvsc")]
-)
-
-(define_insn "iwmmxt_textrc<mode>3"
- [(set (reg:CC CC_REGNUM)
- (subreg:CC (unspec:VMMX [(const_int 0)
- (match_operand:SI 0 "immediate_operand" "i")] UNSPEC_TEXTRC) 0))
- (unspec:CC [(reg:SI 15)] UNSPEC_TEXTRC)]
- "TARGET_REALLY_IWMMXT"
- "textrc<MMX_char>%?\\t r15, %0"
- [(set_attr "predicable" "yes")
- (set_attr "type" "wmmx_textrc")]
-)
diff --git a/gcc/config/arm/marvell-f-iwmmxt.md b/gcc/config/arm/marvell-f-iwmmxt.md
deleted file mode 100644
index c9c7b00..0000000
--- a/gcc/config/arm/marvell-f-iwmmxt.md
+++ /dev/null
@@ -1,189 +0,0 @@
-;; Marvell WMMX2 pipeline description
-;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
-;; Written by Marvell, Inc.
-
-;; This file is part of GCC.
-
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 3, or (at your
-;; option) any later version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3. If not see
-;; <http://www.gnu.org/licenses/>.
-
-
-(define_automaton "marvell_f_iwmmxt")
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Pipelines
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; This is a 7-stage pipelines:
-;;
-;; MD | MI | ME1 | ME2 | ME3 | ME4 | MW
-;;
-;; There are various bypasses modelled to a greater or lesser extent.
-;;
-;; Latencies in this file correspond to the number of cycles after
-;; the issue stage that it takes for the result of the instruction to
-;; be computed, or for its side-effects to occur.
-
-(define_cpu_unit "mf_iwmmxt_MD" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_MI" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME1" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME2" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME3" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME4" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_MW" "marvell_f_iwmmxt")
-
-(define_reservation "mf_iwmmxt_ME"
- "mf_iwmmxt_ME1,mf_iwmmxt_ME2,mf_iwmmxt_ME3,mf_iwmmxt_ME4"
-)
-
-(define_reservation "mf_iwmmxt_pipeline"
- "mf_iwmmxt_MD, mf_iwmmxt_MI, mf_iwmmxt_ME, mf_iwmmxt_MW"
-)
-
-;; An attribute to indicate whether our reservations are applicable.
-(define_attr "marvell_f_iwmmxt" "yes,no"
- (const (if_then_else (symbol_ref "arm_arch_iwmmxt")
- (const_string "yes") (const_string "no"))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; instruction classes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; An attribute appended to instructions for classification
-
-(define_attr "wmmxt_shift" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_pack" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\
- wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\
- wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_mult_c1" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\
- wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\
- wmmx_wqmulwm")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_mult_c2" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c1" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\
- wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c2" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\
- wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\
- wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\
- wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c3" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_wsad")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c1" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\
- wmmx_tmcr, wmmx_tmcrr")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c2" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\
- wmmx_tmrc, wmmx_tmrrc")
- (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c3" "yes,no"
- (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy")
- (const_string "yes") (const_string "no"))
-)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Main description
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c1" 1
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_alu_c1" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_pack" 1
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_pack" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_shift" 1
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_shift" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c1" 1
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_transfer_c1" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c2" 5
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_transfer_c2" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c2" 2
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_alu_c2" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c3" 3
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_alu_c3" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c3" 4
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_transfer_c3" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_mult_c1" 4
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_mult_c1" "yes"))
- "mf_iwmmxt_pipeline")
-
-;There is a forwarding path from ME3 stage
-(define_insn_reservation "marvell_f_iwmmxt_mult_c2" 3
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "wmmxt_mult_c2" "yes"))
- "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_wstr" 0
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "type" "wmmx_wstr"))
- "mf_iwmmxt_pipeline")
-
-;There is a forwarding path from MW stage
-(define_insn_reservation "marvell_f_iwmmxt_wldr" 5
- (and (eq_attr "marvell_f_iwmmxt" "yes")
- (eq_attr "type" "wmmx_wldr"))
- "mf_iwmmxt_pipeline")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 75c06d9..c683ec2 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -806,14 +806,8 @@
;;-------------------------------------------------------------------------
;;
-;; iWMMXt predicates
-;;
-
-(define_predicate "imm_or_reg_operand"
- (ior (match_operand 0 "immediate_operand")
- (match_operand 0 "register_operand")))
-
;; Neon predicates
+;;
(define_predicate "const_multiple_of_8_operand"
(match_code "const_int")
@@ -907,7 +901,8 @@
(define_predicate "mem_noofs_operand"
(and (match_code "mem")
- (match_code "reg" "0")))
+ (match_code "reg" "0")
+ (match_operand 0 "memory_operand")))
(define_predicate "call_insn_operand"
(ior (and (match_code "symbol_ref")
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 641f8f5..670f574 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -50,11 +50,8 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/fa726te.md \
$(srcdir)/config/arm/fmp626.md \
$(srcdir)/config/arm/iterators.md \
- $(srcdir)/config/arm/iwmmxt.md \
- $(srcdir)/config/arm/iwmmxt2.md \
$(srcdir)/config/arm/ldmstm.md \
$(srcdir)/config/arm/ldrdstrd.md \
- $(srcdir)/config/arm/marvell-f-iwmmxt.md \
$(srcdir)/config/arm/mve.md \
$(srcdir)/config/arm/neon.md \
$(srcdir)/config/arm/predicates.md \
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 172c974..019f9d4 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -235,7 +235,7 @@
(define_insn "*thumb2_movsi_insn"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,lk*r,m")
(match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,lk*r"))]
- "TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
+ "TARGET_THUMB2 && !TARGET_HARD_FLOAT
&& ( register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
{
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index b72c871..e517b91 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -184,70 +184,6 @@
; untyped insn without type information - default, and error,
; case.
;
-; The classification below is for instructions used by the Wireless MMX
-; Technology. Each attribute value is used to classify an instruction of the
-; same name or family.
-;
-; wmmx_tandc
-; wmmx_tbcst
-; wmmx_textrc
-; wmmx_textrm
-; wmmx_tinsr
-; wmmx_tmcr
-; wmmx_tmcrr
-; wmmx_tmia
-; wmmx_tmiaph
-; wmmx_tmiaxy
-; wmmx_tmrc
-; wmmx_tmrrc
-; wmmx_tmovmsk
-; wmmx_torc
-; wmmx_torvsc
-; wmmx_wabs
-; wmmx_wdiff
-; wmmx_wacc
-; wmmx_wadd
-; wmmx_waddbhus
-; wmmx_waddsubhx
-; wmmx_waligni
-; wmmx_walignr
-; wmmx_wand
-; wmmx_wandn
-; wmmx_wavg2
-; wmmx_wavg4
-; wmmx_wcmpeq
-; wmmx_wcmpgt
-; wmmx_wmac
-; wmmx_wmadd
-; wmmx_wmax
-; wmmx_wmerge
-; wmmx_wmiawxy
-; wmmx_wmiaxy
-; wmmx_wmin
-; wmmx_wmov
-; wmmx_wmul
-; wmmx_wmulw
-; wmmx_wldr
-; wmmx_wor
-; wmmx_wpack
-; wmmx_wqmiaxy
-; wmmx_wqmulm
-; wmmx_wqmulwm
-; wmmx_wror
-; wmmx_wsad
-; wmmx_wshufh
-; wmmx_wsll
-; wmmx_wsra
-; wmmx_wsrl
-; wmmx_wstr
-; wmmx_wsub
-; wmmx_wsubaddhx
-; wmmx_wunpckeh
-; wmmx_wunpckel
-; wmmx_wunpckih
-; wmmx_wunpckil
-; wmmx_wxor
-;
; The classification below is for NEON instructions. If a new neon type is
; added, please ensure this is added to the is_neon_type attribute below too.
;
@@ -714,65 +650,6 @@
umull,\
umulls,\
untyped,\
- wmmx_tandc,\
- wmmx_tbcst,\
- wmmx_textrc,\
- wmmx_textrm,\
- wmmx_tinsr,\
- wmmx_tmcr,\
- wmmx_tmcrr,\
- wmmx_tmia,\
- wmmx_tmiaph,\
- wmmx_tmiaxy,\
- wmmx_tmrc,\
- wmmx_tmrrc,\
- wmmx_tmovmsk,\
- wmmx_torc,\
- wmmx_torvsc,\
- wmmx_wabs,\
- wmmx_wabsdiff,\
- wmmx_wacc,\
- wmmx_wadd,\
- wmmx_waddbhus,\
- wmmx_waddsubhx,\
- wmmx_waligni,\
- wmmx_walignr,\
- wmmx_wand,\
- wmmx_wandn,\
- wmmx_wavg2,\
- wmmx_wavg4,\
- wmmx_wcmpeq,\
- wmmx_wcmpgt,\
- wmmx_wmac,\
- wmmx_wmadd,\
- wmmx_wmax,\
- wmmx_wmerge,\
- wmmx_wmiawxy,\
- wmmx_wmiaxy,\
- wmmx_wmin,\
- wmmx_wmov,\
- wmmx_wmul,\
- wmmx_wmulw,\
- wmmx_wldr,\
- wmmx_wor,\
- wmmx_wpack,\
- wmmx_wqmiaxy,\
- wmmx_wqmulm,\
- wmmx_wqmulwm,\
- wmmx_wror,\
- wmmx_wsad,\
- wmmx_wshufh,\
- wmmx_wsll,\
- wmmx_wsra,\
- wmmx_wsrl,\
- wmmx_wstr,\
- wmmx_wsub,\
- wmmx_wsubaddhx,\
- wmmx_wunpckeh,\
- wmmx_wunpckel,\
- wmmx_wunpckih,\
- wmmx_wunpckil,\
- wmmx_wxor,\
\
neon_add,\
neon_add_q,\
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a03609d..c1ee972 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -21,7 +21,6 @@
;; UNSPEC Usage:
;; Note: sin and cos are no-longer used.
;; Unspec enumerators for Neon are defined in neon.md.
-;; Unspec enumerators for iwmmxt2 are defined in iwmmxt2.md
(define_c_enum "unspec" [
UNSPEC_PUSH_MULT ; `push multiple' operation:
@@ -42,17 +41,6 @@
; and stack frame generation. Operand 0 is the
; register to "use".
UNSPEC_CHECK_ARCH ; Set CCs to indicate 26-bit or 32-bit mode.
- UNSPEC_WSHUFH ; Used by the intrinsic form of the iWMMXt WSHUFH instruction.
- UNSPEC_WACC ; Used by the intrinsic form of the iWMMXt WACC instruction.
- UNSPEC_TMOVMSK ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction.
- UNSPEC_WSAD ; Used by the intrinsic form of the iWMMXt WSAD instruction.
- UNSPEC_WSADZ ; Used by the intrinsic form of the iWMMXt WSADZ instruction.
- UNSPEC_WMACS ; Used by the intrinsic form of the iWMMXt WMACS instruction.
- UNSPEC_WMACU ; Used by the intrinsic form of the iWMMXt WMACU instruction.
- UNSPEC_WMACSZ ; Used by the intrinsic form of the iWMMXt WMACSZ instruction.
- UNSPEC_WMACUZ ; Used by the intrinsic form of the iWMMXt WMACUZ instruction.
- UNSPEC_CLRDI ; Used by the intrinsic form of the iWMMXt CLRDI instruction.
- UNSPEC_WALIGNI ; Used by the intrinsic form of the iWMMXt WALIGN instruction.
UNSPEC_TLS ; A symbol that has been treated properly for TLS usage.
UNSPEC_PIC_LABEL ; A label used for PIC access that does not appear in the
; instruction stream.
@@ -164,18 +152,6 @@
(define_c_enum "unspec" [
- UNSPEC_WADDC ; Used by the intrinsic form of the iWMMXt WADDC instruction.
- UNSPEC_WABS ; Used by the intrinsic form of the iWMMXt WABS instruction.
- UNSPEC_WQMULWMR ; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
- UNSPEC_WQMULMR ; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
- UNSPEC_WQMULWM ; Used by the intrinsic form of the iWMMXt WQMULWM instruction.
- UNSPEC_WQMULM ; Used by the intrinsic form of the iWMMXt WQMULM instruction.
- UNSPEC_WQMIAxyn ; Used by the intrinsic form of the iWMMXt WMIAxyn instruction.
- UNSPEC_WQMIAxy ; Used by the intrinsic form of the iWMMXt WMIAxy instruction.
- UNSPEC_TANDC ; Used by the intrinsic form of the iWMMXt TANDC instruction.
- UNSPEC_TORC ; Used by the intrinsic form of the iWMMXt TORC instruction.
- UNSPEC_TORVSC ; Used by the intrinsic form of the iWMMXt TORVSC instruction.
- UNSPEC_TEXTRC ; Used by the intrinsic form of the iWMMXt TEXTRC instruction.
UNSPEC_GET_FPSCR_NZCVQC ; Represent fetch of FPSCR_nzcvqc content.
])
@@ -205,12 +181,7 @@
; a 64-bit object.
VUNSPEC_POOL_16 ; `pool-entry(16)'. An entry in the constant pool for
; a 128-bit object.
- VUNSPEC_TMRC ; Used by the iWMMXt TMRC instruction.
- VUNSPEC_TMCR ; Used by the iWMMXt TMCR instruction.
VUNSPEC_ALIGN8 ; 8-byte alignment version of VUNSPEC_ALIGN
- VUNSPEC_WCMP_EQ ; Used by the iWMMXt WCMPEQ instructions
- VUNSPEC_WCMP_GTU ; Used by the iWMMXt WCMPGTU instructions
- VUNSPEC_WCMP_GT ; Used by the iwMMXT WCMPGT instructions
VUNSPEC_EH_RETURN ; Use to override the return address for exception
; handling.
VUNSPEC_ATOMIC_CAS ; Represent an atomic compare swap.
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index a485d05..061165e 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -1,4 +1,4 @@
-;; Machine Description for shared bits common to IWMMXT and Neon.
+;; Machine Description for shared bits common to Neon and MVE.
;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
;; Written by CodeSourcery.
;;
@@ -24,7 +24,6 @@
[(set (match_operand:VNIM1 0 "nonimmediate_operand")
(match_operand:VNIM1 1 "general_operand"))]
"TARGET_NEON
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))
|| (TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
{
@@ -46,8 +45,7 @@
(define_expand "mov<mode>"
[(set (match_operand:VNINOTM1 0 "nonimmediate_operand")
(match_operand:VNINOTM1 1 "general_operand"))]
- "TARGET_NEON
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+ "TARGET_NEON"
{
gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
@@ -83,7 +81,7 @@
})
;; Vector arithmetic. Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon, IWMMXT and MVE.
+;; patterns separately for Neon and MVE.
(define_expand "add<mode>3"
[(set (match_operand:VDQ 0 "s_register_operand")
@@ -103,10 +101,7 @@
[(set (match_operand:VDQWH 0 "s_register_operand")
(mult:VDQWH (match_operand:VDQWH 1 "s_register_operand")
(match_operand:VDQWH 2 "s_register_operand")))]
- "ARM_HAVE_<MODE>_ARITH
- && (!TARGET_REALLY_IWMMXT
- || <MODE>mode == V4HImode
- || <MODE>mode == V2SImode)"
+ "ARM_HAVE_<MODE>_ARITH"
)
(define_expand "smin<mode>3"
@@ -216,13 +211,13 @@
(define_expand "one_cmpl<mode>2"
[(set (match_operand:VDQ 0 "s_register_operand")
(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
- "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
)
(define_expand "<absneg_str><mode>2"
[(set (match_operand:VDQWH 0 "s_register_operand" "")
(ABSNEG:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
- "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
)
(define_expand "cadd<rot><mode>3"
@@ -295,8 +290,7 @@
[(set (match_operand:VDQ 0 "nonimmediate_operand")
(unspec:VDQ [(match_operand:VDQ 1 "general_operand")]
UNSPEC_MISALIGNED_ACCESS))]
- "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN
- && unaligned_access && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
{
rtx *memloc;
bool for_store = false;
@@ -373,7 +367,7 @@
(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w,w")
(match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Ds")]
VSHLQ))]
- "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
"@
<mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
* return neon_output_shift_immediate (\"vshl\", 'i', &operands[2], <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
@@ -385,7 +379,7 @@
[(set (match_operand:VDQIW 0 "s_register_operand" "")
(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
(match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
- "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
{
emit_insn (gen_mve_vshlq_u<mode> (operands[0], operands[1], operands[2]));
DONE;
@@ -398,7 +392,7 @@
[(set (match_operand:VDQIW 0 "s_register_operand")
(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
- "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
{
if (s_register_operand (operands[2], <MODE>mode))
{
@@ -416,7 +410,7 @@
[(set (match_operand:VDQIW 0 "s_register_operand")
(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
- "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
{
if (s_register_operand (operands[2], <MODE>mode))
{
@@ -606,8 +600,7 @@
(define_expand "clz<mode>2"
[(set (match_operand:VDQIW 0 "s_register_operand")
(clz:VDQIW (match_operand:VDQIW 1 "s_register_operand")))]
- "ARM_HAVE_<MODE>_ARITH
- && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_ARITH"
)
(define_expand "vec_init<mode><V_elem_l>"
[(match_operand:VDQX 0 "s_register_operand")
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 2c21e7b..284f49d 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -3167,8 +3167,7 @@ bbinfo_t::optimize_one_block (bool &changed)
|| (bbinfo_t::try_split_any_p && od.try_split_any (this))
|| (bbinfo_t::try_mem0_p && od.try_mem0 (this)));
- rtx_insn *new_insns = get_insns ();
- end_sequence ();
+ rtx_insn *new_insns = end_sequence ();
gcc_assert (found == (od.n_new_insns >= 0));
@@ -3943,10 +3942,7 @@ avr_parallel_insn_from_insns (rtx_insn *i[5])
PATTERN (i[3]), PATTERN (i[4]));
start_sequence ();
emit (gen_rtx_PARALLEL (VOIDmode, vec));
- rtx_insn *insn = get_insns ();
- end_sequence ();
-
- return insn;
+ return end_sequence ();
}
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b192a12..f651692 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1660,8 +1660,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
-size_cfa)));
}
- fp_plus_insns = get_insns ();
- end_sequence ();
+ fp_plus_insns = end_sequence ();
/************ Method 2: Adjust Stack pointer ************/
@@ -1693,8 +1692,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
RTX_FRAME_RELATED_P (insn) = 1;
}
- sp_plus_insns = get_insns ();
- end_sequence ();
+ sp_plus_insns = end_sequence ();
/************ Use shortest method ************/
@@ -2060,8 +2058,7 @@ avr_expand_epilogue (bool sibcall_p)
emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp,
GEN_INT (irq_state)));
- rtx_insn *fp_plus_insns = get_insns ();
- end_sequence ();
+ rtx_insn *fp_plus_insns = end_sequence ();
/********** Method 2: Adjust Stack pointer **********/
@@ -2072,8 +2069,7 @@ avr_expand_epilogue (bool sibcall_p)
emit_move_insn (stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, size));
- rtx_insn *sp_plus_insns = get_insns ();
- end_sequence ();
+ rtx_insn *sp_plus_insns = end_sequence ();
/************ Use shortest method ************/
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 1c4e44d..f8bbdc7 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -718,6 +718,8 @@
"&& reload_completed"
[(parallel [(set (reg:MOVMODE REG_22)
(match_dup 0))
+ (clobber (reg:QI REG_21))
+ (clobber (reg:HI REG_Z))
(clobber (reg:CC REG_CC))])]
{
operands[0] = SET_SRC (single_set (curr_insn));
@@ -727,6 +729,8 @@
[(set (reg:MOVMODE REG_22)
(mem:MOVMODE (lo_sum:PSI (reg:QI REG_21)
(reg:HI REG_Z))))
+ (clobber (reg:QI REG_21))
+ (clobber (reg:HI REG_Z))
(clobber (reg:CC REG_CC))]
"reload_completed
&& (avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, true)
@@ -5269,6 +5273,41 @@
;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
;; arithmetic shift left
+;; Work around PR120423: Transform left shift of a paradoxical subreg
+;; into left shift of the zero-extended entity.
+(define_split ; PR120423
+ [(set (match_operand:HISI 0 "register_operand")
+ (ashift:HISI (subreg:HISI (match_operand:QIPSI 1 "nonimmediate_operand")
+ 0)
+ (match_operand:QI 2 "const_int_operand")))]
+ "!reload_completed
+ && !avropt_lra_p
+ && <HISI:SIZE> > <QIPSI:SIZE>"
+ [(set (match_dup 4)
+ (zero_extend:HISI (match_dup 5)))
+ (set (match_dup 0)
+ (ashift:HISI (match_dup 4)
+ (match_dup 2)))]
+ {
+ operands[4] = gen_reg_rtx (<HISI:MODE>mode);
+ operands[5] = force_reg (<QIPSI:MODE>mode, operands[1]);
+ })
+
+;; Similar happens for PR116389.
+(define_split ; PR116389
+ [(set (match_operand:HISI 0 "register_operand")
+ (subreg:HISI (match_operand:QIPSI 1 "nonimmediate_operand")
+ 0))]
+ "!reload_completed
+ && !avropt_lra_p
+ && <HISI:SIZE> > <QIPSI:SIZE>"
+ [(set (match_dup 0)
+ (zero_extend:HISI (match_dup 2)))]
+ {
+ operands[2] = force_reg (<QIPSI:MODE>mode, operands[1]);
+ })
+
+
;; "ashlqi3"
;; "ashlqq3" "ashluqq3"
(define_expand "ashl<mode>3"
diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc
index 6de22a4..2cf4e77 100644
--- a/gcc/config/bfin/bfin.cc
+++ b/gcc/config/bfin/bfin.cc
@@ -3784,8 +3784,7 @@ hwloop_optimize (hwloop_info loop)
}
}
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (loop->incoming_src)
{
diff --git a/gcc/config/c6x/c6x.cc b/gcc/config/c6x/c6x.cc
index eebff17..695a97e 100644
--- a/gcc/config/c6x/c6x.cc
+++ b/gcc/config/c6x/c6x.cc
@@ -1582,8 +1582,7 @@ c6x_expand_compare (rtx comparison, machine_mode mode)
cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
op0, op_mode, op1, op_mode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_libcall_block (insns, cmp, cmp,
gen_rtx_fmt_ee (code, SImode, op0, op1));
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index e7da250..50bad27 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -444,11 +444,9 @@ struct GTY(()) machine_function
#define TARG_VEC_PERMUTE_COST 1
#endif
-/* ttype entries (the only interesting data references used) are
- sb-relative got-indirect (aka .ehtype). */
+/* .ehtype ttype entries are sb-relative. */
#define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \
- (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \
- : DW_EH_PE_absptr)
+ (((code) == 0 && (data) == 1) ? DW_EH_PE_datarel : DW_EH_PE_absptr)
/* This should be the same as the definition in elfos.h, plus the call
to output special unwinding directives. */
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 42d616a..a34c9e9 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -2692,8 +2692,7 @@ cris_split_movdx (rtx *operands)
else
internal_error ("unknown dest");
- val = get_insns ();
- end_sequence ();
+ val = end_sequence ();
return val;
}
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 0c90c0e..8fe79f5 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -539,8 +539,7 @@
operand_subword (op1, 0, 1, DImode));
emit_move_insn (operand_subword (op0, 1, 1, DImode),
operand_subword (op1, 1, 1, DImode));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_insn (insns);
DONE;
diff --git a/gcc/config/csky/csky.cc b/gcc/config/csky/csky.cc
index 16db497..9888af1 100644
--- a/gcc/config/csky/csky.cc
+++ b/gcc/config/csky/csky.cc
@@ -2899,8 +2899,7 @@ csky_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
*valuep = emit_library_call_value (get_tls_get_addr (),
NULL_RTX, LCT_PURE, /* LCT_CONST? */
Pmode, reg, Pmode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
return insns;
}
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 8c164fd..9b9a3fe 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -504,6 +504,7 @@ extern GTY(()) int darwin_ms_struct;
%{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\
%{static|static-libgcc|static-libquadmath:%:replace-outfile(-lquadmath libquadmath.a%s)}\
%{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\
+ %{static|static-libgcc|static-libgcobol:%:replace-outfile(-lgcobol libgcobol.a%s)}\
%{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\
%{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\
%{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\
diff --git a/gcc/config/epiphany/resolve-sw-modes.cc b/gcc/config/epiphany/resolve-sw-modes.cc
index 8ead531..1206839 100644
--- a/gcc/config/epiphany/resolve-sw-modes.cc
+++ b/gcc/config/epiphany/resolve-sw-modes.cc
@@ -169,8 +169,7 @@ pass_resolve_sw_modes::execute (function *fun)
emit_set_fp_mode (EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN,
jilted_mode, FP_MODE_NONE,
reg_class_contents[NO_REGS]);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
need_commit = true;
insert_insn_on_edge (seq, e);
}
diff --git a/gcc/config/fr30/fr30.cc b/gcc/config/fr30/fr30.cc
index b956a4c..8dd7961 100644
--- a/gcc/config/fr30/fr30.cc
+++ b/gcc/config/fr30/fr30.cc
@@ -976,8 +976,7 @@ fr30_move_double (rtx * operands)
/* This should have been prevented by the constraints on movdi_insn. */
gcc_unreachable ();
- val = get_insns ();
- end_sequence ();
+ val = end_sequence ();
return val;
}
diff --git a/gcc/config/frv/frv.cc b/gcc/config/frv/frv.cc
index e53a0a0..e52bd59 100644
--- a/gcc/config/frv/frv.cc
+++ b/gcc/config/frv/frv.cc
@@ -4759,8 +4759,7 @@ frv_split_scc (rtx dest, rtx test, rtx cc_reg, rtx cr_reg, HOST_WIDE_INT value)
gen_rtx_SET (dest, const0_rtx)));
/* Finish up, return sequence. */
- ret = get_insns ();
- end_sequence ();
+ ret = end_sequence ();
return ret;
}
@@ -4931,8 +4930,7 @@ frv_split_cond_move (rtx operands[])
}
/* Finish up, return sequence. */
- ret = get_insns ();
- end_sequence ();
+ ret = end_sequence ();
return ret;
}
@@ -5062,8 +5060,7 @@ frv_split_minmax (rtx operands[])
}
/* Finish up, return sequence. */
- ret = get_insns ();
- end_sequence ();
+ ret = end_sequence ();
return ret;
}
@@ -5101,8 +5098,7 @@ frv_split_abs (rtx operands[])
gen_rtx_SET (dest, src)));
/* Finish up, return sequence. */
- ret = get_insns ();
- end_sequence ();
+ ret = end_sequence ();
return ret;
}
diff --git a/gcc/config/frv/frv.md b/gcc/config/frv/frv.md
index 1d8b8ae..8ecc633 100644
--- a/gcc/config/frv/frv.md
+++ b/gcc/config/frv/frv.md
@@ -2009,8 +2009,7 @@
gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
gen_rtx_SET (dest, const0_rtx)));
- operands[3] = get_insns ();
- end_sequence ();
+ operands[3] = end_sequence ();
}")
;; Reload CC_UNSmode for unsigned integer comparisons
@@ -2074,8 +2073,7 @@
gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
gen_rtx_SET (dest, const0_rtx)));
- operands[3] = get_insns ();
- end_sequence ();
+ operands[3] = end_sequence ();
}")
;; Reload CC_NZmode. This is mostly the same as the CCmode and CC_UNSmode
@@ -2245,8 +2243,7 @@
emit_insn (gen_andsi3 (int_op0, int_op0, GEN_INT (CC_MASK)));
- operands[2] = get_insns ();
- end_sequence ();
+ operands[2] = end_sequence ();
}")
;; Move a gpr value to FCC.
@@ -2329,8 +2326,7 @@
const0_rtx),
gen_rtx_SET (int_op0, const0_rtx)));
- operands[2] = get_insns ();
- end_sequence ();
+ operands[2] = end_sequence ();
}")
(define_split
@@ -2357,8 +2353,7 @@
if (! ICR_P (REGNO (operands[0])))
emit_insn (gen_movcc_ccr (operands[0], icr));
- operands[2] = get_insns ();
- end_sequence ();
+ operands[2] = end_sequence ();
}")
diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def
index af14203..426acf0 100644
--- a/gcc/config/gcn/gcn-devices.def
+++ b/gcc/config/gcn/gcn-devices.def
@@ -171,6 +171,28 @@ GCN_DEVICE(gfx90c, GFX90C, 0x32, ISA_GCN5,
/* Generic Name */ GFX9_GENERIC
)
+GCN_DEVICE(gfx942, GFX942, 0x4c, ISA_CDNA3,
+ /* XNACK default */ HSACO_ATTR_ANY,
+ /* SRAM_ECC default */ HSACO_ATTR_ANY,
+ /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+ /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+ /* Max ISA VGPRs */ 512,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9,
+ /* Generic Name */ NONE
+ )
+
+GCN_DEVICE(gfx950, GFX950, 0x4f, ISA_CDNA3,
+ /* XNACK default */ HSACO_ATTR_ANY,
+ /* SRAM_ECC default */ HSACO_ATTR_ANY,
+ /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+ /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+ /* Max ISA VGPRs */ 512,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9,
+ /* Generic Name */ NONE
+ )
+
GCN_DEVICE(gfx9-generic, GFX9_GENERIC, 0x051, ISA_GCN5,
/* XNACK default */ HSACO_ATTR_ANY,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
@@ -182,6 +204,17 @@ GCN_DEVICE(gfx9-generic, GFX9_GENERIC, 0x051, ISA_GCN5,
/* Generic Name */ NONE
)
+GCN_DEVICE(gfx9-4-generic, GFX9_4_GENERIC, 0x05f, ISA_CDNA3,
+ /* XNACK default */ HSACO_ATTR_ANY,
+ /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+ /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+ /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+ /* Max ISA VGPRs */ 256,
+ /* Generic code obj version */ 1,
+ /* Architecture Family */ GFX9,
+ /* Generic Name */ NONE
+ )
+
/* GCN GFX10.3 (RDNA 2) */
GCN_DEVICE(gfx1030, GFX1030, 0x36, ISA_RDNA2,
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 88f562d..bcea14f 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -33,7 +33,8 @@ extern enum gcn_isa {
ISA_RDNA2,
ISA_RDNA3,
ISA_CDNA1,
- ISA_CDNA2
+ ISA_CDNA2,
+ ISA_CDNA3
} gcn_isa;
#define TARGET_GCN5 (gcn_isa == ISA_GCN5)
@@ -41,6 +42,8 @@ extern enum gcn_isa {
#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+#define TARGET_CDNA3 (gcn_isa == ISA_CDNA3)
+#define TARGET_CDNA3_PLUS (gcn_isa >= ISA_CDNA3)
#define TARGET_RDNA2 (gcn_isa == ISA_RDNA2)
#define TARGET_RDNA2_PLUS (gcn_isa >= ISA_RDNA2 && gcn_isa < ISA_CDNA1)
#define TARGET_RDNA3 (gcn_isa == ISA_RDNA3)
@@ -81,18 +84,22 @@ enum hsaco_attr_type
#define TARGET_DPP8 TARGET_RDNA2_PLUS
/* Device requires CDNA1-style manually inserted wait states for AVGPRs. */
#define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1
+/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0, sc1) flag
+ for scalar memory operations. The string starts on purpose with a space. */
+#define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc")
/* The metadata on different devices need different granularity. */
#define TARGET_VGPR_GRANULARITY \
(TARGET_RDNA3 ? 12 \
: TARGET_RDNA2_PLUS || TARGET_CDNA2_PLUS ? 8 \
: 4)
/* This mostly affects the metadata. */
-#define TARGET_ARCHITECTED_FLAT_SCRATCH TARGET_RDNA3
+#define TARGET_ARCHITECTED_FLAT_SCRATCH (TARGET_RDNA3 || TARGET_CDNA3)
/* Device has Sub-DWord Addressing instrucions. */
#define TARGET_SDWA (!TARGET_RDNA3)
/* Different devices uses different cache control instructions. */
-#define TARGET_WBINVL1_CACHE (!TARGET_RDNA2_PLUS)
+#define TARGET_WBINVL1_CACHE (!TARGET_RDNA2_PLUS && !TARGET_CDNA3)
#define TARGET_GLn_CACHE TARGET_RDNA2_PLUS
+#define TARGET_TARGET_SC_CACHE TARGET_CDNA3
/* Some devices have TGSPLIT, which needs at least metadata. */
#define TARGET_TGSPLIT TARGET_CDNA2_PLUS
diff --git a/gcc/config/gcn/gcn-tables.opt b/gcc/config/gcn/gcn-tables.opt
index 96ce9bd..4a381b3 100644
--- a/gcc/config/gcn/gcn-tables.opt
+++ b/gcc/config/gcn/gcn-tables.opt
@@ -49,9 +49,18 @@ EnumValue
Enum(gpu_type) String(gfx90c) Value(PROCESSOR_GFX90C)
EnumValue
+Enum(gpu_type) String(gfx942) Value(PROCESSOR_GFX942)
+
+EnumValue
+Enum(gpu_type) String(gfx950) Value(PROCESSOR_GFX950)
+
+EnumValue
Enum(gpu_type) String(gfx9-generic) Value(PROCESSOR_GFX9_GENERIC)
EnumValue
+Enum(gpu_type) String(gfx9-4-generic) Value(PROCESSOR_GFX9_4_GENERIC)
+
+EnumValue
Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
EnumValue
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 977ad88..4b21302 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1161,7 +1161,7 @@
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[3]);
- const char *glc = INTVAL (operands[4]) ? " glc" : "";
+ const char *glc = INTVAL (operands[4]) ? TARGET_GLC_NAME : "";
static char buf[200];
if (AS_FLAT_P (as))
@@ -1221,7 +1221,7 @@
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[4]);
- const char *glc = INTVAL (operands[5]) ? " glc" : "";
+ const char *glc = INTVAL (operands[5]) ? TARGET_GLC_NAME : "";
static char buf[200];
if (AS_GLOBAL_P (as))
@@ -1288,7 +1288,7 @@
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[3]);
- const char *glc = INTVAL (operands[4]) ? " glc" : "";
+ const char *glc = INTVAL (operands[4]) ? TARGET_GLC_NAME : "";
static char buf[200];
if (AS_FLAT_P (as))
@@ -1345,7 +1345,7 @@
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[4]);
- const char *glc = INTVAL (operands[5]) ? " glc" : "";
+ const char *glc = INTVAL (operands[5]) ? TARGET_GLC_NAME : "";
static char buf[200];
if (AS_GLOBAL_P (as))
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index d59e87b..2d8dfa3 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -585,9 +585,8 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
case XNACK_MASK_HI_REG:
case TBA_HI_REG:
case TMA_HI_REG:
- return mode == SImode;
case VCC_HI_REG:
- return false;
+ return mode == SImode;
case EXEC_HI_REG:
return mode == SImode /*|| mode == V32BImode */ ;
case SCC_REG:
@@ -3096,8 +3095,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
saved_scalars++;
}
- rtx move_scalars = get_insns ();
- end_sequence ();
+ rtx move_scalars = end_sequence ();
start_sequence ();
/* Ensure that all vector lanes are moved. */
@@ -3232,8 +3230,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
offset += size;
}
- rtx move_vectors = get_insns ();
- end_sequence ();
+ rtx move_vectors = end_sequence ();
if (prologue)
{
@@ -3360,8 +3357,7 @@ gcn_expand_prologue ()
+ offsets->callee_saves))));
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn (seq);
}
@@ -5860,8 +5856,7 @@ gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
{
start_sequence ();
emit_move_insn (exec_save_reg, exec_reg);
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn_after (seq, last_exec_def);
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -5877,8 +5872,7 @@ gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
/* Restore EXEC register before the usage. */
start_sequence ();
emit_move_insn (exec_reg, exec);
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn_before (seq, insn);
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6039,8 +6033,7 @@ gcn_md_reorg (void)
{
start_sequence ();
emit_move_insn (exec_reg, GEN_INT (new_exec));
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn_before (seq, insn);
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6587,8 +6580,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
if (avgpr % vgpr_block_size)
avgpr += vgpr_block_size - (avgpr % vgpr_block_size);
- fputs ("\t.rodata\n"
- "\t.p2align\t6\n"
+ switch_to_section (readonly_data_section);
+ fputs ("\t.p2align\t6\n"
"\t.amdhsa_kernel\t", file);
assemble_name (file, name);
fputs ("\n", file);
@@ -6707,7 +6700,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
fputs (" .end_amdgpu_metadata\n", file);
#endif
- fputs ("\t.text\n", file);
+ switch_to_section (current_function_section ());
fputs ("\t.align\t256\n", file);
fputs ("\t.type\t", file);
assemble_name (file, name);
@@ -7108,7 +7101,8 @@ print_operand_address (FILE *file, rtx mem)
E - print conditional code for v_cmp (eq_u64/ne_u64...)
A - print address in formatting suitable for given address space.
O - print offset:n for data share operations.
- g - print "glc", if appropriate for given MEM
+ G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum]
+ g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM
L - print low-part of a multi-reg value
H - print second part of a multi-reg value (high-part of 2-reg value)
J - print third part of a multi-reg value
@@ -7724,10 +7718,13 @@ print_operand (FILE *file, rtx x, int code)
else
output_addr_const (file, x);
return;
+ case 'G':
+ fputs (TARGET_GLC_NAME, file);
+ return;
case 'g':
gcc_assert (xcode == MEM);
if (MEM_VOLATILE_P (x))
- fputs (" glc", file);
+ fputs (TARGET_GLC_NAME, file);
return;
default:
output_operand_lossage ("invalid %%xn code");
@@ -7908,8 +7905,6 @@ gcn_dwarf_register_span (rtx rtl)
#define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
#undef TARGET_LIBC_HAS_FUNCTION
#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_true
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
#undef TARGET_MEMORY_MOVE_COST
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 5198fbc..3d42de3 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -43,6 +43,8 @@ extern const struct gcn_device_def {
builtin_define ("__CDNA1__"); \
else if (TARGET_CDNA2) \
builtin_define ("__CDNA2__"); \
+ else if (TARGET_CDNA3) \
+ builtin_define ("__CDNA3__"); \
else if (TARGET_RDNA2) \
builtin_define ("__RDNA2__"); \
else if (TARGET_RDNA3) \
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 695656f..1998931 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -206,7 +206,7 @@
; vdata: vgpr0-255
; srsrc: sgpr0-102
; soffset: sgpr0-102
-; flags: offen, idxen, glc, lds, slc, tfe
+; flags: offen, idxen, %G, lds, slc, tfe
;
; mtbuf - Typed memory buffer operation. Two words
; offset: 12-bit constant
@@ -216,10 +216,10 @@
; vdata: vgpr0-255
; srsrc: sgpr0-102
; soffset: sgpr0-102
-; flags: offen, idxen, glc, lds, slc, tfe
+; flags: offen, idxen, %G, lds, slc, tfe
;
; flat - flat or global memory operations
-; flags: glc, slc
+; flags: %G, slc
; addr: vgpr0-255
; data: vgpr0-255
; vdst: vgpr0-255
@@ -1018,7 +1018,9 @@
[(const_int 0)]
""
{
- sorry ("exception handling not supported");
+ if (!fake_exceptions)
+ sorry ("exception handling not supported");
+ DONE;
})
;; }}}
@@ -1962,6 +1964,14 @@
[(set_attr "type" "mult")
(set_attr "length" "8")])
+(define_insn "*memory_barrier"
+ [(set (match_operand:BLK 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+ "TARGET_TARGET_SC_CACHE"
+ "buffer_inv sc1"
+ [(set_attr "type" "mubuf")
+ (set_attr "length" "4")])
+
; FIXME: These patterns have been disabled as they do not seem to work
; reliably - they can cause hangs or incorrect results.
; TODO: flush caches according to memory model
@@ -1977,9 +1987,9 @@
(use (match_operand 3 "const_int_operand"))]
"0 /* Disabled. */"
"@
- s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
- flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
- global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
+ s_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0)
+ flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0
+ global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
[(set_attr "type" "smem,flat,flat")
(set_attr "length" "12")])
@@ -2044,9 +2054,9 @@
UNSPECV_ATOMIC))]
""
"@
- s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
- flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
- global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
+ s_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\tlgkmcnt(0)
+ flat_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\t0
+ global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
[(set_attr "type" "smem,flat,flat")
(set_attr "length" "12")
(set_attr "delayeduse" "*,yes,yes")])
@@ -2086,15 +2096,15 @@
switch (which_alternative)
{
case 0:
- return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
+ return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)";
case 1:
return (TARGET_RDNA2 /* Not GFX11. */
- ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0"
- : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0");
+ ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0"
+ : "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0");
case 2:
return (TARGET_RDNA2 /* Not GFX11. */
- ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)"
- : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)");
+ ? "global_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\tvmcnt(0)"
+ : "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)");
}
break;
case MEMMODEL_CONSUME:
@@ -2103,25 +2113,31 @@
switch (which_alternative)
{
case 0:
- return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
+ return "s_load%o0\t%0, %A1 %G1\;s_waitcnt\tlgkmcnt(0)\;"
"s_dcache_wb_vol";
case 1:
return (TARGET_RDNA2
- ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0\;"
+ ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0\;"
"buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_RDNA3
- ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+ ? "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
"buffer_gl1_inv\;buffer_gl0_inv"
- : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+ : TARGET_TARGET_SC_CACHE
+ ? "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
+ "buffer_inv sc1"
+ : "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
"buffer_wbinvl1_vol");
case 2:
return (TARGET_RDNA2
- ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)\;"
+ ? "global_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\tvmcnt(0)\;"
"buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_RDNA3
- ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+ ? "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
"buffer_gl1_inv\;buffer_gl0_inv"
- : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+ : TARGET_TARGET_SC_CACHE
+ ? "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
+ "buffer_inv sc1"
+ : "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
"buffer_wbinvl1_vol");
}
break;
@@ -2131,25 +2147,31 @@
switch (which_alternative)
{
case 0:
- return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
+ return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 %G1\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
return (TARGET_RDNA2
- ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc dlc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1 dlc\;"
"s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_RDNA3
- ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
- : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;flat_load%o0\t%0, %A1%O1 %G1\;"
+ "s_waitcnt\t0\;buffer_inv sc1"
+ : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
return (TARGET_RDNA2
- ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc dlc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1 dlc\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_RDNA3
- ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
- : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;global_load%o0\t%0, %A1%O1 %G1\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
+ : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
}
break;
@@ -2174,11 +2196,11 @@
switch (which_alternative)
{
case 0:
- return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
+ return "s_store%o1\t%1, %A0 %G1\;s_waitcnt\tlgkmcnt(0)";
case 1:
- return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
+ return "flat_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\t0";
case 2:
- return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
+ return "global_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\tvmcnt(0)";
}
break;
case MEMMODEL_RELEASE:
@@ -2186,18 +2208,22 @@
switch (which_alternative)
{
case 0:
- return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
+ return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 %G1";
case 1:
return (TARGET_GLn_CACHE
- ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1"
: TARGET_WBINVL1_CACHE
- ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"
+ ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1"
: "error: cache architectire unspecified");
case 2:
return (TARGET_GLn_CACHE
- ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 %G1"
: TARGET_WBINVL1_CACHE
- ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"
+ ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1"
: "error: cache architecture unspecified");
}
break;
@@ -2207,23 +2233,29 @@
switch (which_alternative)
{
case 0:
- return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
+ return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 %G1\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
return (TARGET_GLn_CACHE
- ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_WBINVL1_CACHE
- ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
+ ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1\;"
+ "s_waitcnt\t0\;buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
return (TARGET_GLn_CACHE
- ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_WBINVL1_CACHE
- ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
+ ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
}
break;
@@ -2250,11 +2282,11 @@
switch (which_alternative)
{
case 0:
- return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
+ return "s_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\tlgkmcnt(0)";
case 1:
- return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
+ return "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0";
case 2:
- return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ return "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)";
}
break;
@@ -2264,23 +2296,29 @@
switch (which_alternative)
{
case 0:
- return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
+ return "s_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\tlgkmcnt(0)\;"
"s_dcache_wb_vol\;s_dcache_inv_vol";
case 1:
return (TARGET_GLn_CACHE
- ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+ ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
"buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_WBINVL1_CACHE
- ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+ ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
"buffer_wbinvl1_vol"
+ : TARGET_TARGET_SC_CACHE
+ ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
+ "buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
return (TARGET_GLn_CACHE
- ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_WBINVL1_CACHE
- ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+ : TARGET_TARGET_SC_CACHE
+ ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
}
break;
@@ -2289,24 +2327,31 @@
switch (which_alternative)
{
case 0:
- return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
+ return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\tlgkmcnt(0)";
case 1:
return (TARGET_GLn_CACHE
- ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0"
: TARGET_WBINVL1_CACHE
- ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+ "s_waitcnt\t0"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0"
: "error: cache architecture unspecified");
case 2:
return (TARGET_GLn_CACHE
? "buffer_gl1_inv\;buffer_gl0_inv\;"
- "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)"
: TARGET_WBINVL1_CACHE
? "buffer_wbinvl1_vol\;"
- "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+ "s_waitcnt\tvmcnt(0)"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)"
: "error: cache architecture unspecified");
}
@@ -2317,25 +2362,32 @@
switch (which_alternative)
{
case 0:
- return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
+ return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
return (TARGET_GLn_CACHE
- ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_WBINVL1_CACHE
- ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+ "s_waitcnt\t0\;buffer_inv sc1"
: "error: cache architecture unspecified");
case 2:
return (TARGET_GLn_CACHE
? "buffer_gl1_inv\;buffer_gl0_inv\;"
- "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
: TARGET_WBINVL1_CACHE
? "buffer_wbinvl1_vol\;"
- "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+ : TARGET_TARGET_SC_CACHE
+ ? "buffer_inv sc1\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
: "error: cache architecture unspecified");
}
break;
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 142b439..99d6aeb 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -101,3 +101,11 @@ Enum(gcn_preferred_vectorization_factor) String(32) Value(32)
EnumValue
Enum(gcn_preferred_vectorization_factor) String(64) Value(64)
+
+mfake-exceptions
+Target Var(fake_exceptions) Init(0) Undocumented
+; With '-mfake-exceptions' enabled, the user-visible behavior in presence of
+; exception handling constructs changes such that the compile-time
+; 'sorry, unimplemented: exception handling not supported' is skipped, code
+; generation proceeds, and instead, exception handling constructs 'abort' at
+; run time. (..., or don't, if they're in dead code.)
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index f5b89c9..b284ff4 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -1160,6 +1160,9 @@ main (int argc, char **argv)
obstack_ptr_grow (&cc_argv_obstack, "-xlto");
if (fopenmp)
obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
+ /* The host code may contain exception handling constructs.
+ Handle these as good as we can. */
+ obstack_ptr_grow (&cc_argv_obstack, "-mfake-exceptions");
for (int ix = 1; ix != argc; ix++)
{
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h
deleted file mode 100644
index 21e4b36..0000000
--- a/gcc/config/i386/avx10_2-512bf16intrin.h
+++ /dev/null
@@ -1,681 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED
-#define _AVX10_2_512BF16INTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_addbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_subbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_mulbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_divbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_maxbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_minbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
-{
- return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
- return (__m512bh)
- __builtin_ia32_scalefbf16512_mask (__A, __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
- __m512bh __C, __mmask32 __U)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, __m512bh __C)
-{
- return (__m512bh)
- __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rsqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rsqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_sqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_sqrtbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_sqrtbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rcpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rcpbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_rcpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_pbh (__m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_getexpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
-{
- return (__m512bh)
- __builtin_ia32_getexpbf16512_mask (__A,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-/* Intrinsics vrndscalebf16. */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_pbh (__m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_rndscalebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_rndscalebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-#else
-#define _mm512_roundscale_pbh(A, B) \
- (__builtin_ia32_rndscalebf16512_mask ((A), (B), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
-
-#define _mm512_mask_roundscale_pbh(A, B, C, D) \
- (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_roundscale_pbh(A, B, C) \
- (__builtin_ia32_rndscalebf16512_mask ((B), (C), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vreducebf16. */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_pbh (__m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_reducebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_reducebf16512_mask (__A, B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
-{
- return (__m512bh)
- __builtin_ia32_reducebf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-#else
-#define _mm512_reduce_pbh(A, B) \
- (__builtin_ia32_reducebf16512_mask ((A), (B), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
-
-#define _mm512_mask_reduce_pbh(A, B, C, D) \
- (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_reduce_pbh(A, B, C) \
- (__builtin_ia32_reducebf16512_mask ((B), (C), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vgetmantbf16. */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512bh)
- __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512bh)
- __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
- __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512bh)
- __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
-}
-
-#else
-#define _mm512_getmant_pbh(A, B, C) \
- (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
-
-#define _mm512_mask_getmant_pbh(A, B, C, D, E) \
- (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
-
-#define _mm512_maskz_getmant_pbh(A, B, C, D) \
- (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfpclassbf16. */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
- const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_fpclassbf16512_mask (__A, __imm,
- (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_fpclass_pbh_mask(U, X, C) \
- ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
- (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
-
-#define _mm512_fpclass_pbh_mask(X, C) \
- ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
- (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
-#endif /* __OPIMTIZE__ */
-
-
-/* Intrinsics vcmpbf16. */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
- const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
-{
- return (__mmask32)
- __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
- (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \
- ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
-
-#define _mm512_cmp_pbh_mask(A, B, C) \
- ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
-
-#endif /* __OPIMTIZE__ */
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h
deleted file mode 100644
index 611a40d..0000000
--- a/gcc/config/i386/avx10_2-512convertintrin.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead."
-#endif // _IMMINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2__
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
- __m512 __B)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf) __W,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
- __m512 __B, const int __R)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
- __m512 __B, const int __R)
-{
- return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U,
- __R);
-}
-
-#else
-#define _mm512_cvtx_round2ps_ph(A, B, R) \
- ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (__v32hf) \
- (_mm512_setzero_ph ()), \
- (__mmask32) (-1), \
- (R)))
-#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
- ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (__v32hf) (W), \
- (__mmask32) (U), \
- (R)))
-#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
- ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (__v32hf) \
- (_mm512_setzero_ph ()), \
- (__mmask32) (U), \
- (R)))
-#endif /* __OPTIMIZE__ */
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
- __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
- __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
- __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
- __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
- return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
- (__v32hf) __B,
- (__v32qi)(__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
- __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi) __W,
- (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
- return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
- (__v32hf) __B,
- (__v64qi)
- _mm512_setzero_si512 (),
- (__mmask64) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvthf8_ph (__m256i __A)
-{
- return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
- (__v32hf) (__m512h)
- _mm512_undefined_ph (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
-{
- return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
- (__v32hf) (__m512h) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
-{
- return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
- (__v32hf) (__m512h)
- _mm512_setzero_ph (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_bf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_ph_bf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_hf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
- (__v32qi)(__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_ph_hf8 (__m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_undefined_si256 (),
- (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i) __W,
- (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
-{
- return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
- (__v32qi) (__m256i)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbf8_ph (__m256i __A)
-{
- return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
- (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
-{
- return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
- (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
-{
- return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
- (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
-}
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h
deleted file mode 100644
index 43271e7..0000000
--- a/gcc/config/i386/avx10_2-512mediaintrin.h
+++ /dev/null
@@ -1,514 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-
-#if !defined(__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
- __m512i __A, __m512i __B)
-{
- return (__m512i)
- __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
-{
- return (__m512)
- __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
- (__v16sf) __A,
- (__v16sf) __B,
- (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
- __m512h __B)
-{
- return (__m512)
- __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
- (__v16sf) __A,
- (__v16sf) __B,
- (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
- __m512h __B)
-{
- return (__m512)
- __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
- (__v16sf) __A,
- (__v16sf) __B,
- (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
-{
- return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
- (__v64qi) __Y,
- __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
- __m512i __Y, const int __M)
-{
- return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
- (__v64qi) __Y,
- __M,
- (__v32hi) __W,
- __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
- __m512i __Y, const int __M)
-{
- return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
- (__v64qi) __Y,
- __M,
- (__v32hi) _mm512_setzero_epi32 (),
- __U);
-}
-#else
-#define _mm512_mpsadbw_epu8(X, Y, M) \
- (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \
- (__v64qi)(__m512i)(Y), (int)(M))
-
-#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \
- (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
- (__v64qi)(__m512i)(Y), \
- (int)(M), \
- (__v32hi)(__m512i)(W), \
- (__mmask32)(U))
-
-#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \
- (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
- (__v64qi)(__m512i)(Y), \
- (int)(M), \
- (__v32hi) _mm512_setzero_epi32 (), \
- (__mmask32)(U))
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h
deleted file mode 100644
index a743346..0000000
--- a/gcc/config/i386/avx10_2-512minmaxintrin.h
+++ /dev/null
@@ -1,489 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-#ifdef __OPTIMIZE__
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
-{
- return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf)(__m512bh)
- _mm512_setzero_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, __m512bh __B, const int __C)
-{
- return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
- __m512bh __B, const int __C)
-{
- return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
- (__v32bf) __B,
- __C,
- (__v32bf)(__m512bh)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_undefined_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
- __m512d __B, const int __C)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
- const int __C)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
- const int __R)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_undefined_pd (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
- __m512d __B, const int __C, const int __R)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
- const int __C, const int __R)
-{
- return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
- (__v8df) __B,
- __C,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_undefined_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
- __m512h __B, const int __C)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf) __W,
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
- const int __C)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_undefined_ph (),
- (__mmask32) -1, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
- __m512h __B, const int __C, const int __R)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf) __W,
- (__mmask32) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
- const int __C, const int __R)
-{
- return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
- (__v32hf) __B,
- __C,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_undefined_ps (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
- __m512 __B, const int __C)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
- const int __C)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_undefined_ps (),
- (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
- __m512 __B, const int __C, const int __R)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf) __W,
- (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
- const int __C, const int __R)
-{
- return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
- (__v16sf) __B,
- __C,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U, __R);
-}
-
-#else
-#define _mm512_minmax_pbh(A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) \
- _mm512_setzero_si512 (), \
- (__mmask32) (-1)))
-
-#define _mm512_mask_minmax_pbh(W, U, A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) (W), \
- (__mmask32) (U)))
-
-#define _mm512_maskz_minmax_pbh(U, A, B, C) \
- ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
- (__v32bf) (B), \
- (int) (C), \
- (__v32bf) (__m512bh) \
- _mm512_setzero_si512 (), \
- (__mmask32) (U)))
-
-#define _mm512_minmax_round_pd(A, B, C, R) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_undefined_pd (), \
- (__mmask8) (-1), \
- (int) (R)))
-
-#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) (W), \
- (__mmask8) (U), \
- (int) (R)))
-
-#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_setzero_pd (), \
- (__mmask8) (U), \
- (int) (R)))
-
-#define _mm512_minmax_round_ph(A, B, C, R) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_undefined_ph (), \
- (__mmask32) (-1), \
- (int) (R)))
-
-#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) (W), \
- (__mmask32) (U), \
- (int) (R)))
-
-#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_setzero_ph (), \
- (__mmask32) (U), \
- (int) (R)))
-
-#define _mm512_minmax_round_ps(A, B, C, R) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_undefined_ps (), \
- (__mmask16) (-1), \
- (int) (R)))
-
-#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) (W), \
- (__mmask16) (U), \
- (int) (R)))
-
-#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_setzero_ps (), \
- (__mmask16) (U), \
- (int) (R)))
-
-#define _mm512_minmax_pd(A, B, C) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_undefined_pd (), \
- (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_pd(W, U, A, B, C) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) (W), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_pd(U, A, B, C) \
- ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
- (__v8df) (B), \
- (int) (C), \
- (__v8df) (__m512d) \
- _mm512_setzero_pd (), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ph(A, B, C) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_undefined_ph (), \
- (__mmask32) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ph(W, U, A, B, C) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) (W), \
- (__mmask32) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ph(U, A, B, C) \
- ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
- (__v32hf) (B), \
- (int) (C), \
- (__v32hf) (__m512h) \
- _mm512_setzero_ph (), \
- (__mmask32) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ps(A, B, C) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_undefined_ps (), \
- (__mmask16) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ps(W, U, A, B, C) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) (W), \
- (__mmask16) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ps(U, A, B, C) \
- ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
- (__v16sf) (B), \
- (int) (C), \
- (__v16sf) (__m512) \
- _mm512_setzero_ps (), \
- (__mmask16) (U), \
- _MM_FROUND_CUR_DIRECTION))
-
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h
deleted file mode 100644
index 215b7fd..0000000
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ /dev/null
@@ -1,1575 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_bf16_epi8 (__m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_bf16_epu8 (__m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
-{
- return
- (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
-{
- return (__m512i)
- __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
- (__v32hi) _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
- return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
-{
- return (__m512i)
- __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ph_epi8 (__m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ph_epu8 (__m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ps_epi8 (__m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ps_epu8 (__m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ph_epi8 (__m512h __A)
-{
- return (__m512i)
- __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
-{
- return
- (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ph_epu8 (__m512h __A)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
- return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ps_epi8 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ps_epu8 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epi32 (__m512d __A)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
- return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epi64 (__m512d __A)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
- return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
-{
- return
- (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epu32 (__m512d __A)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
- return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epu64 (__m512d __A)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
- return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epi32 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
-{
- return
- (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epi64 (__m256 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
-{
- return
- (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epu32 (__m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epu64 (__m256 __A)
-{
- return (__m512i)
- __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
- return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
-{
- return
- (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_undefined_si512 (),
- (__mmask32) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi) __W,
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
- (__v32hi)
- _mm512_setzero_si512 (),
- (__mmask32) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
-{
- return (__m256i)
- __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_undefined_si256 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
- (__v8si) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
-{
- return
- (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_undefined_si512 (),
- (__mmask16) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
- (__v16si) __W,
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
-{
- return (__m512i)
- __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_undefined_si512 (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
- const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
- (__v8di) __W,
- (__mmask8) __U,
- __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
-{
- return
- (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
- (__v8di)
- _mm512_setzero_si512 (),
- (__mmask8) __U,
- __R);
-}
-#else
-#define _mm512_ipcvts_roundph_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvts_roundph_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvts_roundps_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_ipcvts_roundps_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundph_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundph_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_undefined_si512 ()), \
- (__mmask32) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) (W), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
- (__v32hi) \
- (_mm512_setzero_si512 ()), \
- (__mmask32) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundps_epi8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_ipcvtts_roundps_epu8(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epi32(A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_undefined_si256 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
- ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
- (__v8si) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_setzero_si256 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epi64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epu32(A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_undefined_si256 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
- ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
- (__v8si) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
- ((__m256i) \
- __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
- (__v8si) \
- (_mm256_setzero_si256 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundpd_epu64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epi32(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epi64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epu32(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_undefined_si512 ()), \
- (__mmask16) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
- (__v16si) (W), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
- (__v16si) \
- (_mm512_setzero_si512 ()), \
- (__mmask16) (U), \
- (R)))
-
-#define _mm512_cvtts_roundps_epu64(A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_undefined_si512 ()), \
- (__mmask8) (-1), \
- (R)))
-
-#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
- ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
- (__v8di) (W), \
- (__mmask8) (U), \
- (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
- ((__m512i) \
- __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
- (__v8di) \
- (_mm512_setzero_si512 ()), \
- (__mmask8) (U), \
- (R)))
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h
index e6890fc..9560480 100644
--- a/gcc/config/i386/avx10_2bf16intrin.h
+++ b/gcc/config/i386/avx10_2bf16intrin.h
@@ -34,6 +34,32 @@
#define __DISABLE_AVX10_2__
#endif /* __AVX10_2__ */
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_addbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_pbh (__m256bh __A, __m256bh __B)
@@ -86,6 +112,32 @@ _mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_subbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_pbh (__m256bh __A, __m256bh __B)
@@ -138,6 +190,32 @@ _mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_mulbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_pbh (__m256bh __A, __m256bh __B)
@@ -190,6 +268,32 @@ _mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_divbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_div_pbh (__m256bh __A, __m256bh __B)
@@ -242,6 +346,32 @@ _mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_maxbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_pbh (__m256bh __A, __m256bh __B)
@@ -294,6 +424,32 @@ _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_minbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_pbh (__m256bh __A, __m256bh __B)
@@ -346,6 +502,32 @@ _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_scalefbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_scalef_pbh (__m256bh __A, __m256bh __B)
@@ -398,6 +580,41 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -468,6 +685,41 @@ _mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -537,6 +789,41 @@ _mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -607,6 +894,41 @@ _mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -677,6 +999,35 @@ _mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A,
__builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rsqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rsqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rsqrt_pbh (__m256bh __A)
@@ -733,6 +1084,34 @@ _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_sqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_sqrtbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_sqrtbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sqrt_pbh (__m256bh __A)
@@ -789,6 +1168,34 @@ _mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rcpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rcpbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_rcpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_rcp_pbh (__m256bh __A)
@@ -845,6 +1252,33 @@ _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A)
__U);
}
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pbh (__m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_getexpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
+{
+ return (__m512bh)
+ __builtin_ia32_getexpbf16512_mask (__A,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getexp_pbh (__m256bh __A)
@@ -903,6 +1337,34 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A)
/* Intrinsics vrndscalebf16. */
#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pbh (__m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_rndscalebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_rndscalebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_roundscale_pbh (__m256bh __A, int B)
@@ -962,6 +1424,19 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
}
#else
+#define _mm512_roundscale_pbh(A, B) \
+ (__builtin_ia32_rndscalebf16512_mask ((A), (B), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
+
+#define _mm512_mask_roundscale_pbh(A, B, C, D) \
+ (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_roundscale_pbh(A, B, C) \
+ (__builtin_ia32_rndscalebf16512_mask ((B), (C), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
+
#define _mm256_roundscale_pbh(A, B) \
(__builtin_ia32_rndscalebf16256_mask ((A), (B), \
(__v16bf) _mm256_setzero_si256 (), \
@@ -992,6 +1467,35 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
/* Intrinsics vreducebf16. */
#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_pbh (__m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_reducebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_reducebf16512_mask (__A, B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+ return (__m512bh)
+ __builtin_ia32_reducebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_reduce_pbh (__m256bh __A, int B)
@@ -1051,6 +1555,19 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
}
#else
+#define _mm512_reduce_pbh(A, B) \
+ (__builtin_ia32_reducebf16512_mask ((A), (B), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
+
+#define _mm512_mask_reduce_pbh(A, B, C, D) \
+ (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_reduce_pbh(A, B, C) \
+ (__builtin_ia32_reducebf16512_mask ((B), (C), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
+
#define _mm256_reduce_pbh(A, B) \
(__builtin_ia32_reducebf16256_mask ((A), (B), \
(__v16bf) _mm256_setzero_si256 (), \
@@ -1082,6 +1599,40 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
/* Intrinsics vgetmantbf16. */
#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512bh)
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512bh)
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512bh)
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B,
@@ -1151,6 +1702,19 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
}
#else
+#define _mm512_getmant_pbh(A, B, C) \
+ (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
+
+#define _mm512_mask_getmant_pbh(A, B, C, D, E) \
+ (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+
+#define _mm512_maskz_getmant_pbh(A, B, C, D) \
+ (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
+
#define _mm256_getmant_pbh(A, B, C) \
(__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \
(__v16bf) _mm256_setzero_si256 (), \
@@ -1180,6 +1744,24 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
/* Intrinsics vfpclassbf16. */
#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
+ const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_fpclassbf16512_mask (__A, __imm,
+ (__mmask32) -1);
+}
+
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A,
@@ -1214,6 +1796,14 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
}
#else
+#define _mm512_mask_fpclass_pbh_mask(U, X, C) \
+ ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
+ (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
+
+#define _mm512_fpclass_pbh_mask(X, C) \
+ ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \
+ (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
+
#define _mm256_mask_fpclass_pbh_mask(U, A, B) \
((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U)))
@@ -1233,6 +1823,24 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
/* Intrinsics vcmpbf16. */
#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
+ const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
+{
+ return (__mmask32)
+ __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
+ (__mmask32) -1);
+}
+
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A,
@@ -1268,6 +1876,12 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm)
}
#else
+#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \
+ ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
+
+#define _mm512_cmp_pbh_mask(A, B, C) \
+ ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
+
#define _mm256_mask_cmp_pbh_mask(A, B, C, D) \
((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A)))
diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h
index 8cbdc66..f2fb98f 100644
--- a/gcc/config/i386/avx10_2convertintrin.h
+++ b/gcc/config/i386/avx10_2convertintrin.h
@@ -98,6 +98,103 @@ _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B)
(__mmask16) __U);
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
+ __m512 __B, const int __R)
+{
+ return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ __R);
+}
+
+#else
+#define _mm512_cvtx_round2ps_ph(A, B, R) \
+ ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (__v32hf) \
+ (_mm512_setzero_ph ()), \
+ (__mmask32) (-1), \
+ (R)))
+#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
+ ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (__v32hf) (W), \
+ (__mmask32) (U), \
+ (R)))
+#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
+ ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (__v32hf) \
+ (_mm512_setzero_ph ()), \
+ (__mmask32) (U), \
+ (R)))
+#endif /* __OPTIMIZE__ */
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbiasph_bf8 (__m128i __A, __m128h __B)
@@ -161,6 +258,39 @@ _mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
+ __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_biasph_bf8 (__m128i __A, __m128h __B)
@@ -224,6 +354,39 @@ _mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
+ __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbiasph_hf8 (__m128i __A, __m128h __B)
@@ -287,6 +450,39 @@ _mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
+ __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_biasph_hf8 (__m128i __A, __m128h __B)
@@ -350,6 +546,39 @@ _mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
+ __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+ return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+ (__v32hf) __B,
+ (__v32qi)(__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt2ph_bf8 (__m128h __A, __m128h __B)
@@ -416,6 +645,39 @@ _mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_2ph_bf8 (__m128h __A, __m128h __B)
@@ -482,6 +744,39 @@ _mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt2ph_hf8 (__m128h __A, __m128h __B)
@@ -548,6 +843,39 @@ _mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_2ph_hf8 (__m128h __A, __m128h __B)
@@ -614,6 +942,39 @@ _mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
(__mmask32) __U);
}
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
+ __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+ return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+ (__v32hf) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
extern __inline__ __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvthf8_ph (__m128i __A)
@@ -672,6 +1033,35 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A)
(__mmask16) __U);
}
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvthf8_ph (__m256i __A)
+{
+ return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+ (__v32hf) (__m512h)
+ _mm512_undefined_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
+{
+ return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+ (__v32hf) (__m512h) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
+{
+ return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+ (__v32hf) (__m512h)
+ _mm512_setzero_ph (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtph_bf8 (__m128h __A)
@@ -730,6 +1120,35 @@ _mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_bf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_ph_bf8 (__m128h __A)
@@ -788,6 +1207,35 @@ _mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_bf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtph_hf8 (__m128h __A)
@@ -846,6 +1294,35 @@ _mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_hf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+ (__v32qi)(__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline__ __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvts_ph_hf8 (__m128h __A)
@@ -904,6 +1381,35 @@ _mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A)
(__mmask16) __U);
}
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_hf8 (__m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_undefined_si256 (),
+ (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i) __W,
+ (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
+{
+ return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+ (__v32qi) (__m256i)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtbf8_ph (__m128i __A)
@@ -952,6 +1458,30 @@ _mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A)
(__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8));
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbf8_ph (__m256i __A)
+{
+ return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+ (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
+{
+ return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
+ (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
+{
+ return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+ (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
+}
+
#ifdef __DISABLE_AVX10_2__
#undef __DISABLE_AVX10_2__
#pragma GCC pop_options
diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h
index 0993e8e..7d30502 100644
--- a/gcc/config/i386/avx10_2mediaintrin.h
+++ b/gcc/config/i386/avx10_2mediaintrin.h
@@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W,
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U,
@@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W,
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
+ __m512i __A, __m512i __B)
+{
+ return (__m512i)
+ __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
+ (__v16si) __A,
+ (__v16si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
+{
+ return (__m512)
+ __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
+ __m512h __B)
+{
+ return (__m512)
+ __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
+ __m512h __B)
+{
+ return (__m512)
+ __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
+ (__v16sf) __A,
+ (__v16sf) __B,
+ (__mmask16) __U);
+}
+
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B)
@@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
(__v16hi) _mm256_setzero_si256 (),
__U);
}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
+{
+ return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
+ (__v64qi) __Y,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
+ __m512i __Y, const int __M)
+{
+ return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ __M,
+ (__v32hi) __W,
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
+ __m512i __Y, const int __M)
+{
+ return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ __M,
+ (__v32hi) _mm512_setzero_epi32 (),
+ __U);
+}
#else
#define _mm_mask_mpsadbw_epu8(W, U, X, Y, M) \
(__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X), \
@@ -829,6 +1281,23 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
(__v16hi) _mm256_setzero_si256 (), \
(__mmask16)(U))
+#define _mm512_mpsadbw_epu8(X, Y, M) \
+ (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(M))
+
+#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \
+ (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), \
+ (int)(M), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U))
+
+#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \
+ (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), \
+ (int)(M), \
+ (__v32hi) _mm512_setzero_epi32 (), \
+ (__mmask32)(U))
#endif
#ifdef __DISABLE_AVX10_2__
diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h
index 0a4a253..f9fe14e 100644
--- a/gcc/config/i386/avx10_2minmaxintrin.h
+++ b/gcc/config/i386/avx10_2minmaxintrin.h
@@ -103,6 +103,43 @@ _mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A,
(__mmask16) __U);
}
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_pd (__m128d __A, __m128d __B, const int __C)
@@ -169,6 +206,84 @@ _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
(__mmask8) __U);
}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_ph (__m128h __A, __m128h __B, const int __C)
@@ -235,6 +350,83 @@ _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
(__mmask16) __U);
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U, __R);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_ps (__m128 __A, __m128 __B, const int __C)
@@ -301,6 +493,83 @@ _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
(__mmask8) __U);
}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minmax_sd (__m128d __A, __m128d __B, const int __C)
@@ -580,6 +849,29 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_si256 (), \
(__mmask16) (U)))
+#define _mm512_minmax_pbh(A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_pbh(W, U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) (W), \
+ (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_pbh(U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (U)))
+
#define _mm_minmax_pd(A, B, C) \
((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
(__v2df) (B), \
@@ -626,6 +918,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_pd (), \
(__mmask8) (U)))
+#define _mm512_minmax_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
#define _mm_minmax_ph(A, B, C) \
((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
(__v8hf) (B), \
@@ -672,6 +1016,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_ph (), \
(__mmask16) (U)))
+#define _mm512_minmax_ph(A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ph(A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ (int) (R)))
+
#define _mm_minmax_ps(A, B, C) \
((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
(__v4sf) (B), \
@@ -718,6 +1114,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
_mm256_setzero_ps (), \
(__mmask8) (U)))
+#define _mm512_minmax_ps(A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ (int) (R)))
+
#define _mm_minmax_round_sd(A, B, C, R) \
((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
(__v2df) (B), \
diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h
index 78bcd72..c4fa19b 100644
--- a/gcc/config/i386/avx10_2satcvtintrin.h
+++ b/gcc/config/i386/avx10_2satcvtintrin.h
@@ -63,37 +63,6 @@ _mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A)
(__mmask8) __U);
}
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvts_bf16_epi8 (__m256bh __A)
-{
- return
- (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
- (__v16hi)
- _mm256_undefined_si256 (),
- (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
-{
- return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
- (__v16hi) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
-{
- return
- (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
- (__v16hi)
- _mm256_setzero_si256 (),
- (__mmask16) __U);
-}
-
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ipcvts_bf16_epu8 (__m128bh __A)
@@ -127,6 +96,37 @@ _mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ipcvts_bf16_epi8 (__m256bh __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
+{
+ return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_ipcvts_bf16_epu8 (__m256bh __A)
{
return
@@ -156,120 +156,66 @@ _mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A)
(__mmask16) __U);
}
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ph_epi8 (__m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_undefined_si128 (),
- (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
- (__v8hi) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_setzero_si128 (),
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ph_epu8 (__m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_undefined_si128 (),
- (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
- (__v8hi) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
-{
- return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
- (__v8hi)
- _mm_setzero_si128 (),
- (__mmask8) __U);
-}
-
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ps_epi8 (__m128 __A)
+_mm512_ipcvts_bf16_epi8 (__m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_undefined_si128 (),
- (__mmask8) -1);
+ return
+ (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
- (__v4si) __W,
- (__mmask8) __U);
+ return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
+_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_setzero_si128 (),
- (__mmask8) __U);
+ return
+ (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ps_epu8 (__m128 __A)
+_mm512_ipcvts_bf16_epu8 (__m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_undefined_si128 (),
- (__mmask8) -1);
+ return
+ (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
- (__v4si) __W,
- (__mmask8) __U);
+ return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
}
-extern __inline __m128i
+extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
+_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
{
- return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
- (__v4si)
- _mm_setzero_si128 (),
- (__mmask8) __U);
+ return
+ (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
}
extern __inline __m128i
@@ -390,6 +336,183 @@ _mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A)
(__mmask16) __U);
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi) _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+ return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epi8 (__m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epu8 (__m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
+{
+ return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ps_epi8 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ps_epu8 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ipcvtts_ph_epi8 (__m128h __A)
@@ -1234,6 +1357,1416 @@ _mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A)
(__mmask8) __U);
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epi8 (__m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epu8 (__m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epi8 (__m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epu8 (__m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epi8 (__m512h __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epu8 (__m512h __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epi8 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epu8 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi32 (__m512d __A)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi64 (__m512d __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu32 (__m512d __A)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu64 (__m512d __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi32 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi64 (__m256 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu32 (__m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu64 (__m256 __A)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_undefined_si512 (),
+ (__mmask32) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+#else
+#define _mm512_ipcvts_roundph_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvts_roundph_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvts_roundps_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_ipcvts_roundps_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundph_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundph_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask32) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) (W), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+ (__v32hi) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask32) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundps_epi8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_ipcvtts_roundps_epu8(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+#endif
+
extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtts_sd_epi32 (__m128d __A)
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 6740109..6c087e6 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BF16INTRIN_H_INCLUDED
#define _AVX512BF16INTRIN_H_INCLUDED
-#if !defined (__AVX512BF16__) || defined (__EVEX512__)
+#if !defined (__AVX512BF16__)
#pragma GCC push_options
-#pragma GCC target("avx512bf16,no-evex512")
+#pragma GCC target("avx512bf16")
#define __DISABLE_AVX512BF16__
#endif /* __AVX512BF16__ */
@@ -42,17 +42,6 @@ _mm_cvtsbh_ss (__bf16 __A)
return __builtin_ia32_cvtbf2sf (__A);
}
-#ifdef __DISABLE_AVX512BF16__
-#undef __DISABLE_AVX512BF16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BF16__ */
-
-#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bf16,evex512")
-#define __DISABLE_AVX512BF16_512__
-#endif /* __AVX512BF16_512__ */
-
/* Internal data types for implementing the intrinsics. */
typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
@@ -155,8 +144,8 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
(__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
}
-#ifdef __DISABLE_AVX512BF16_512__
-#undef __DISABLE_AVX512BF16_512__
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BF16_512__ */
diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h
index ffaceac..fd6d183 100644
--- a/gcc/config/i386/avx512bf16vlintrin.h
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
#define _AVX512BF16VLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
#pragma GCC push_options
-#pragma GCC target("avx512bf16,avx512vl,no-evex512")
+#pragma GCC target("avx512bf16,avx512vl")
#define __DISABLE_AVX512BF16VL__
#endif /* __AVX512BF16__ */
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
index 301f125..d7156f9 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
#define _AVX512BITALGINTRIN_H_INCLUDED
-#if !defined (__AVX512BITALG__) || !defined (__EVEX512__)
+#if !defined (__AVX512BITALG__)
#pragma GCC push_options
-#pragma GCC target("avx512bitalg,evex512")
+#pragma GCC target("avx512bitalg")
#define __DISABLE_AVX512BITALG__
#endif /* __AVX512BITALG__ */
diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
index e4883cf..cf9cff6 100644
--- a/gcc/config/i386/avx512bitalgvlintrin.h
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BITALGVLINTRIN_H_INCLUDED
#define _AVX512BITALGVLINTRIN_H_INCLUDED
-#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || defined (__EVEX512__)
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512vl,no-evex512")
+#pragma GCC target("avx512bitalg,avx512vl")
#define __DISABLE_AVX512BITALGVL__
#endif /* __AVX512BITALGVL__ */
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 47c4c03..5e9eeaa 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512BWINTRIN_H_INCLUDED
#define _AVX512BWINTRIN_H_INCLUDED
-#if !defined (__AVX512BW__) || defined (__EVEX512__)
+#if !defined (__AVX512BW__)
#pragma GCC push_options
-#pragma GCC target("avx512bw,no-evex512")
+#pragma GCC target("avx512bw")
#define __DISABLE_AVX512BW__
#endif /* __AVX512BW__ */
@@ -346,17 +346,6 @@ _kandn_mask64 (__mmask64 __A, __mmask64 __B)
return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B);
}
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
-
-#if !defined (__AVX512BW__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bw,evex512")
-#define __DISABLE_AVX512BW_512__
-#endif /* __AVX512BW_512__ */
-
/* Internal data types for implementing the intrinsics. */
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \
@@ -3369,8 +3358,8 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
#endif
-#ifdef __DISABLE_AVX512BW_512__
-#undef __DISABLE_AVX512BW_512__
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BW_512__ */
diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h
index 206cc49..5a92d25 100644
--- a/gcc/config/i386/avx512cdintrin.h
+++ b/gcc/config/i386/avx512cdintrin.h
@@ -30,7 +30,7 @@
#ifndef __AVX512CD__
#pragma GCC push_options
-#pragma GCC target("avx512cd,evex512")
+#pragma GCC target("avx512cd")
#define __DISABLE_AVX512CD__
#endif /* __AVX512CD__ */
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 1d10225..a7766b5 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512DQINTRIN_H_INCLUDED
#define _AVX512DQINTRIN_H_INCLUDED
-#if !defined (__AVX512DQ__) || defined (__EVEX512__)
+#if !defined (__AVX512DQ__)
#pragma GCC push_options
-#pragma GCC target("avx512dq,no-evex512")
+#pragma GCC target("avx512dq")
#define __DISABLE_AVX512DQ__
#endif /* __AVX512DQ__ */
@@ -639,17 +639,6 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
#endif
-#ifdef __DISABLE_AVX512DQ__
-#undef __DISABLE_AVX512DQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ__ */
-
-#if !defined (__AVX512DQ__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512dq,evex512")
-#define __DISABLE_AVX512DQ_512__
-#endif /* __AVX512DQ_512__ */
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f64x2 (__m128d __A)
@@ -2897,9 +2886,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
#endif
-#ifdef __DISABLE_AVX512DQ_512__
-#undef __DISABLE_AVX512DQ_512__
+#ifdef __DISABLE_AVX512DQ__
+#undef __DISABLE_AVX512DQ__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ_512__ */
+#endif /* __DISABLE_AVX512DQ__ */
#endif /* _AVX512DQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 9160787..4469f73 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512FINTRIN_H_INCLUDED
#define _AVX512FINTRIN_H_INCLUDED
-#if !defined (__AVX512F__) || defined (__EVEX512__)
+#if !defined (__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("avx512f,no-evex512")
+#pragma GCC target("avx512f")
#define __DISABLE_AVX512F__
#endif /* __AVX512F__ */
@@ -54,11 +54,12 @@ typedef enum
_MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
} _MM_MANTISSA_SIGN_ENUM;
-/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
- from AVX2 or before. We need to add them to prevent target option mismatch
- when calling AVX512 intrins implemented with these intrins under no-evex512
- function attribute. All AVX512 intrins calling those AVX2 intrins or
- before will change their calls to these AVX512 version. */
+/* These _mm{,256}_avx512* intrins are initially duplicated from their
+ _mm{,256}_* forms from AVX2 or before. At that time, e need to add them
+ to prevent target option mismatch when calling AVX512 intrins implemented
+ with these intrins under no-evex512 function attribute. Thess intrins will
+ still be here to avoid huge changes. All AVX512 intrins calling those AVX2
+ intrins or before have changed their calls to these AVX512 version. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avx512_undefined_ps (void)
{
@@ -3802,17 +3803,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
#endif
-#ifdef __DISABLE_AVX512F__
-#undef __DISABLE_AVX512F__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F__ */
-
-#if !defined (__AVX512F__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512f,evex512")
-#define __DISABLE_AVX512F_512__
-#endif /* __AVX512F_512__ */
-
/* Internal data types for implementing the intrinsics. */
typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
@@ -16609,9 +16599,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
#undef __MM512_REDUCE_OP
-#ifdef __DISABLE_AVX512F_512__
-#undef __DISABLE_AVX512F_512__
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F_512__ */
+#endif /* __DISABLE_AVX512F__ */
#endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index f158f87..471ec05 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512FP16INTRIN_H_INCLUDED
#define _AVX512FP16INTRIN_H_INCLUDED
-#if !defined (__AVX512FP16__) || defined (__EVEX512__)
+#if !defined (__AVX512FP16__)
#pragma GCC push_options
-#pragma GCC target("avx512fp16,no-evex512")
+#pragma GCC target("avx512fp16")
#define __DISABLE_AVX512FP16__
#endif /* __AVX512FP16__ */
@@ -2852,17 +2852,6 @@ _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
#define _mm_maskz_cmul_round_sch(U, A, B, R) \
_mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
-#ifdef __DISABLE_AVX512FP16__
-#undef __DISABLE_AVX512FP16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16__ */
-
-#if !defined (__AVX512FP16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512fp16,evex512")
-#define __DISABLE_AVX512FP16_512__
-#endif /* __AVX512FP16_512__ */
-
typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
@@ -7238,9 +7227,9 @@ _mm512_set1_pch (_Float16 _Complex __A)
#define _mm512_maskz_cmul_round_pch(U, A, B, R) \
_mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
-#ifdef __DISABLE_AVX512FP16_512__
-#undef __DISABLE_AVX512FP16_512__
+#ifdef __DISABLE_AVX512FP16__
+#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16_512__ */
+#endif /* __DISABLE_AVX512FP16__ */
#endif /* _AVX512FP16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 59e6c88..cb98310 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
#define __AVX512FP16VLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
#pragma GCC push_options
-#pragma GCC target("avx512fp16,avx512vl,no-evex512")
+#pragma GCC target("avx512fp16,avx512vl")
#define __DISABLE_AVX512FP16VL__
#endif /* __AVX512FP16VL__ */
diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h
index ed97350..56790c0 100644
--- a/gcc/config/i386/avx512ifmaintrin.h
+++ b/gcc/config/i386/avx512ifmaintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
#define _AVX512IFMAINTRIN_H_INCLUDED
-#if !defined (__AVX512IFMA__) || !defined (__EVEX512__)
+#if !defined (__AVX512IFMA__)
#pragma GCC push_options
-#pragma GCC target("avx512ifma,evex512")
+#pragma GCC target("avx512ifma")
#define __DISABLE_AVX512IFMA__
#endif /* __AVX512IFMA__ */
diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h
index 681bda3..6b849c8 100644
--- a/gcc/config/i386/avx512ifmavlintrin.h
+++ b/gcc/config/i386/avx512ifmavlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
#define _AVX512IFMAVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
#pragma GCC push_options
-#pragma GCC target("avx512ifma,avx512vl,no-evex512")
+#pragma GCC target("avx512ifma,avx512vl")
#define __DISABLE_AVX512IFMAVL__
#endif /* __AVX512IFMAVL__ */
diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index f5515a8..e8bfe1d 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
#define __AVX512VBMI2INTRIN_H_INCLUDED
-#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__)
+#if !defined(__AVX512VBMI2__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi2,evex512")
+#pragma GCC target("avx512vbmi2")
#define __DISABLE_AVX512VBMI2__
#endif /* __AVX512VBMI2__ */
diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h
index e9857ba..5cdfebd 100644
--- a/gcc/config/i386/avx512vbmi2vlintrin.h
+++ b/gcc/config/i386/avx512vbmi2vlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
#define _AVX512VBMI2VLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi2,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi2,avx512vl")
#define __DISABLE_AVX512VBMI2VL__
#endif /* __AVX512VBMIVL__ */
diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h
index 901a2f7..5f5e342 100644
--- a/gcc/config/i386/avx512vbmiintrin.h
+++ b/gcc/config/i386/avx512vbmiintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
#define _AVX512VBMIINTRIN_H_INCLUDED
-#if !defined (__AVX512VBMI__) || !defined (__EVEX512__)
+#if !defined (__AVX512VBMI__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi,evex512")
+#pragma GCC target("avx512vbmi")
#define __DISABLE_AVX512VBMI__
#endif /* __AVX512VBMI__ */
diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h
index 90cd590..037ea93 100644
--- a/gcc/config/i386/avx512vbmivlintrin.h
+++ b/gcc/config/i386/avx512vbmivlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
#define _AVX512VBMIVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi,avx512vl")
#define __DISABLE_AVX512VBMIVL__
#endif /* __AVX512VBMIVL__ */
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 9f0a5b4..537e408 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VLBWINTRIN_H_INCLUDED
#define _AVX512VLBWINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512bw,no-evex512")
+#pragma GCC target("avx512vl,avx512bw")
#define __DISABLE_AVX512VLBW__
#endif /* __AVX512VLBW__ */
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
index 3b23d4a..5783dbe 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VLDQINTRIN_H_INCLUDED
#define _AVX512VLDQINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq,no-evex512")
+#pragma GCC target("avx512vl,avx512dq")
#define __DISABLE_AVX512VLDQ__
#endif /* __AVX512VLDQ__ */
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 4451a1f..50930cd 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VLINTRIN_H_INCLUDED
#define _AVX512VLINTRIN_H_INCLUDED
-#if !defined (__AVX512VL__) || defined (__EVEX512__)
+#if !defined (__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,no-evex512")
+#pragma GCC target("avx512vl")
#define __DISABLE_AVX512VL__
#endif /* __AVX512VL__ */
@@ -13650,7 +13650,7 @@ _mm256_permutex_pd (__m256d __X, const int __M)
#if !defined (__AVX512CD__) || !defined (__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512cd,no-evex512")
+#pragma GCC target("avx512vl,avx512cd")
#define __DISABLE_AVX512VLCD__
#endif
diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h
index 5d0eaff..fe7b663 100644
--- a/gcc/config/i386/avx512vnniintrin.h
+++ b/gcc/config/i386/avx512vnniintrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
#define __AVX512VNNIINTRIN_H_INCLUDED
-#if !defined(__AVX512VNNI__) || !defined (__EVEX512__)
+#if !defined(__AVX512VNNI__)
#pragma GCC push_options
-#pragma GCC target("avx512vnni,evex512")
+#pragma GCC target("avx512vnni")
#define __DISABLE_AVX512VNNI__
#endif /* __AVX512VNNI__ */
diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h
index 7774bbd..01c3c91 100644
--- a/gcc/config/i386/avx512vnnivlintrin.h
+++ b/gcc/config/i386/avx512vnnivlintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
#define _AVX512VNNIVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
#pragma GCC push_options
-#pragma GCC target("avx512vnni,avx512vl,no-evex512")
+#pragma GCC target("avx512vnni,avx512vl")
#define __DISABLE_AVX512VNNIVL__
#endif /* __AVX512VNNIVL__ */
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index e170cf5..50f7ead 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
-#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__)
#pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,evex512")
+#pragma GCC target("avx512vp2intersect")
#define __DISABLE_AVX512VP2INTERSECT__
#endif /* __AVX512VP2INTERSECT__ */
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
index afdd2da..3e0a8ab 100644
--- a/gcc/config/i386/avx512vp2intersectvlintrin.h
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -28,10 +28,9 @@
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) \
- || defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,avx512vl,no-evex512")
+#pragma GCC target("avx512vp2intersect,avx512vl")
#define __DISABLE_AVX512VP2INTERSECTVL__
#endif /* __AVX512VP2INTERSECTVL__ */
diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h
index 3357255..e4b89ea 100644
--- a/gcc/config/i386/avx512vpopcntdqintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
-#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__)
+#if !defined (__AVX512VPOPCNTDQ__)
#pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,evex512")
+#pragma GCC target("avx512vpopcntdq")
#define __DISABLE_AVX512VPOPCNTDQ__
#endif /* __AVX512VPOPCNTDQ__ */
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
index 17d836f..8eb1d42 100644
--- a/gcc/config/i386/avx512vpopcntdqvlintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -28,10 +28,9 @@
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
-#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) \
- || defined (__EVEX512__)
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
#pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,avx512vl,no-evex512")
+#pragma GCC target("avx512vpopcntdq,avx512vl")
#define __DISABLE_AVX512VPOPCNTDQVL__
#endif /* __AVX512VPOPCNTDQVL__ */
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 3ddcbec..0a3173c 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -28,16 +28,15 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_SEH
#define TARGET_SEH (TARGET_64BIT_MS_ABI && flag_unwind_tables)
+#undef PREFERRED_STACK_BOUNDARY_DEFAULT
+#define PREFERRED_STACK_BOUNDARY_DEFAULT \
+ (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY)
+
/* Win64 with SEH cannot represent DRAP stack frames. Disable its use.
Force the use of different mechanisms to allocate aligned local data. */
#undef MAX_STACK_ALIGNMENT
#define MAX_STACK_ALIGNMENT (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
-/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions
- may require 16-byte alignment. */
-#undef STACK_REALIGN_DEFAULT
-#define STACK_REALIGN_DEFAULT TARGET_SSE
-
/* Support hooks for SEH. */
#undef TARGET_ASM_UNWIND_EMIT
#define TARGET_ASM_UNWIND_EMIT i386_pe_seh_unwind_emit
@@ -247,9 +246,10 @@ do { \
#undef ASM_OUTPUT_LABELREF
#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
do { \
+ const char *prefix = ""; \
if ((NAME)[0] != FASTCALL_PREFIX) \
- fputs (user_label_prefix, (STREAM)); \
- fputs ((NAME), (STREAM)); \
+ prefix = user_label_prefix; \
+ ix86_asm_output_labelref ((STREAM), prefix, (NAME)); \
} while (0)
/* This does much the same in memory rather than to a stream. */
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 1ff05e5..63c7d79 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -374,33 +374,6 @@ detect_caches_intel (bool xeon_mp, unsigned max_level,
#define has_feature(f) \
has_cpu_feature (&cpu_model, cpu_features2, f)
-/* We will emit a warning when using AVX10.1 and AVX512 options with one
- enabled and the other disabled. Add this function to avoid push "-mno-"
- options under this scenario for -march=native. */
-
-bool check_avx512_features (__processor_model &cpu_model,
- unsigned int (&cpu_features2)[SIZE_OF_CPU_FEATURES],
- const enum processor_features feature)
-{
- if (has_feature (FEATURE_AVX10_1_256)
- && ((feature == FEATURE_AVX512F)
- || (feature == FEATURE_AVX512CD)
- || (feature == FEATURE_AVX512DQ)
- || (feature == FEATURE_AVX512BW)
- || (feature == FEATURE_AVX512VL)
- || (feature == FEATURE_AVX512IFMA)
- || (feature == FEATURE_AVX512VBMI)
- || (feature == FEATURE_AVX512VBMI2)
- || (feature == FEATURE_AVX512VNNI)
- || (feature == FEATURE_AVX512VPOPCNTDQ)
- || (feature == FEATURE_AVX512BITALG)
- || (feature == FEATURE_AVX512FP16)
- || (feature == FEATURE_AVX512BF16)))
- return false;
-
- return true;
-}
-
/* This will be called by the spec parser in gcc.cc when it sees
a %:local_cpu_detect(args) construct. Currently it will be
called with either "arch [32|64]" or "tune [32|64]" as argument
@@ -909,12 +882,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
options = concat (options, " ",
isa_names_table[i].option, NULL);
}
- /* Never push -mno-avx10.1-{256,512} under -march=native to
- avoid unnecessary warnings when building libraries. */
- else if (isa_names_table[i].feature != FEATURE_AVX10_1_256
- && isa_names_table[i].feature != FEATURE_AVX10_1
- && check_avx512_features (cpu_model, cpu_features2,
- isa_names_table[i].feature))
+ else
options = concat (options, neg_option,
isa_names_table[i].option + 2, NULL);
}
diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile
index 528b34e..0e9e5fe 100755
--- a/gcc/config/i386/gcc-auto-profile
+++ b/gcc/config/i386/gcc-auto-profile
@@ -24,8 +24,16 @@ if [ "$1" = "--all" ] ; then
shift
fi
-if ! grep -q Intel /proc/cpuinfo ; then
- echo >&2 "Only Intel CPUs supported"
+if grep -q AuthenticAMD /proc/cpuinfo ; then
+ vendor=AMD
+ if ! grep -q " brs" /proc/cpuinfo && ! grep -q amd_lbr_v2 /proc/cpuinfo ; then
+ echo >&2 "AMD CPU with brs (Zen 3) or amd_lbr_v2 (Zen 4+) feature is required"
+ exit 1
+ fi
+elif grep -q Intel /proc/cpuinfo ; then
+ vendor=Intel
+else
+ echo >&2 "Only AMD and Intel CPUs supported"
exit 1
fi
@@ -33,7 +41,7 @@ if grep -q hypervisor /proc/cpuinfo ; then
echo >&2 "Warning: branch profiling may not be functional in VMs"
fi
-case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
+case `test $vendor = Intel && grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
grep -E "^model\s*:" /proc/cpuinfo | head -n1` in
model*:\ 46|\
model*:\ 30|\
@@ -82,6 +90,8 @@ model*:\ 126|\
model*:\ 167|\
model*:\ 140|\
model*:\ 141|\
+model*:\ 143|\
+model*:\ 207|\
model*:\ 106|\
model*:\ 108|\
model*:\ 173|\
@@ -89,15 +99,20 @@ model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;;
model*:\ 134|\
model*:\ 150|\
model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
-model*:\ 143|\
-model*:\ 207) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;;
-model*:\ 190) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
+model*:\ 190|\
+model*:\ 175|\
+model*:\ 182) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;;
*)
if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; then
E=br_inst_retired.near_taken:p
+ elif perf list ex_ret_brn_tkn | grep -q ex_ret_brn_tkn ; then
+ E=ex_ret_brn_tkn:P$FLAGS
+ elif $vendor = Intel ; then
+echo >&2 "Unknown Intel CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+ exit 1
else
-echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+echo >&2 "AMD CPU without support for ex_ret_brn_tkn event"
exit 1
fi ;;
esac
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index c7e21e6..bc433c2 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -297,9 +297,9 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
#pragma GCC pop_options
#endif /* __GFNIAVX512VLBW__ */
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__)
+#if !defined(__GFNI__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("gfni,avx512f,evex512")
+#pragma GCC target("gfni,avx512f")
#define __DISABLE_GFNIAVX512F__
#endif /* __GFNIAVX512F__ */
@@ -341,9 +341,9 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
#pragma GCC pop_options
#endif /* __GFNIAVX512F__ */
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__)
+#if !defined(__GFNI__) || !defined(__AVX512BW__)
#pragma GCC push_options
-#pragma GCC target("gfni,avx512bw,evex512")
+#pragma GCC target("gfni,avx512bw")
#define __DISABLE_GFNIAVX512FBW__
#endif /* __GFNIAVX512FBW__ */
diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc
index e083f49..87804a5 100644
--- a/gcc/config/i386/host-mingw32.cc
+++ b/gcc/config/i386/host-mingw32.cc
@@ -135,7 +135,6 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
and earlier, backslashes are invalid in object name. So, we need
to check if we are on Windows2000 or higher. */
OSVERSIONINFO version_info;
- int r;
version_info.dwOSVersionInfoSize = sizeof (version_info);
@@ -169,25 +168,24 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
return -1;
}
- /* Retry five times, as here might occure a race with multiple gcc's
- instances at same time. */
- for (r = 0; r < 5; r++)
- {
- mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
- size, addr);
- if (mmap_addr == addr)
- break;
- if (r != 4)
- Sleep (500);
- }
-
- if (mmap_addr != addr)
+ /* Try mapping the file at `addr`. */
+ mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+ size, addr);
+ if (mmap_addr == NULL)
{
- w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
- CloseHandle(mmap_handle);
- return -1;
+ /* We could not map the file at its original address, so let the
+ system choose a different one. The PCH can be relocated later. */
+ mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+ size, NULL);
+ if (mmap_addr == NULL)
+ {
+ w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+ CloseHandle(mmap_handle);
+ return -1;
+ }
}
+ addr = mmap_addr;
return 1;
}
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a142711..fe42c6436 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -204,53 +204,53 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstored256, "__builtin_ia32_mas
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI)
/* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI)
@@ -297,14 +297,14 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si,
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT)
/* AVX512BW */
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
/* AVX512VP2INTERSECT */
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI)
@@ -411,9 +411,9 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
/* AVX512FP16 */
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
@@ -434,17 +434,17 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_B
BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
/* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
@@ -1384,230 +1384,230 @@ BDESC (OPTION_MASK_ISA_BMI2, 0, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si"
BDESC (OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64)
/* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
/* Mask arithmetic operations */
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
@@ -2433,136 +2433,136 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI)
/* AVX512DQ. */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
/* AVX512BW. */
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
/* AVX512IFMA */
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2577,13 +2577,13 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
/* AVX512VBMI */
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
@@ -2594,16 +2594,16 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
/* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
@@ -2612,64 +2612,64 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi_mask, "__builtin_ia32_vpshld_v8hi_mask", IX86_BUILTIN_VPSHLDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si, "__builtin_ia32_vpshld_v8si", IX86_BUILTIN_VPSHLDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si_mask, "__builtin_ia32_vpshld_v8si_mask", IX86_BUILTIN_VPSHLDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si, "__builtin_ia32_vpshld_v4si", IX86_BUILTIN_VPSHLDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si_mask, "__builtin_ia32_vpshld_v4si_mask", IX86_BUILTIN_VPSHLDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di, "__builtin_ia32_vpshld_v4di", IX86_BUILTIN_VPSHLDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di_mask, "__builtin_ia32_vpshld_v4di_mask", IX86_BUILTIN_VPSHLDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di, "__builtin_ia32_vpshld_v2di", IX86_BUILTIN_VPSHLDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2677,27 +2677,27 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2706,20 +2706,20 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
/* GFNI */
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineinvqb_v32qi, "__builtin_ia32_vgf2p8affineinvqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v32qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineinvqb_v16qi, "__builtin_ia32_vgf2p8affineinvqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineinvqb_v16qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineqb_v32qi, "__builtin_ia32_vgf2p8affineqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v32qi_mask, "__builtin_ia32_vgf2p8affineqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineqb_v16qi, "__builtin_ia32_vgf2p8affineqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineqb_v16qi_mask, "__builtin_ia32_vgf2p8affineqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8mulb_v32qi, "__builtin_ia32_vgf2p8mulb_v32qi", IX86_BUILTIN_VGF2P8MULB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v32qi_mask, "__builtin_ia32_vgf2p8mulb_v32qi_mask", IX86_BUILTIN_VGF2P8MULB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
@@ -2727,9 +2727,9 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v
/* AVX512_VNNI */
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2737,9 +2737,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2747,9 +2747,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2757,9 +2757,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2798,13 +2798,13 @@ BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpd
/* VPCLMULQDQ */
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
-BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
/* VPOPCNTDQ */
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
@@ -2816,21 +2816,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
/* BITALG */
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI)
BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI)
@@ -2840,39 +2840,39 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B
/* VAES. */
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
/* BF16 */
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf, "__builtin_ia32_cvtne2ps2bf16_v16bf", IX86_BUILTIN_CVTNE2PS2BF16_V16BF, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_mask, "__builtin_ia32_cvtne2ps2bf16_v16bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASK, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_V16BF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v16bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf, "__builtin_ia32_cvtne2ps2bf16_v8bf", IX86_BUILTIN_CVTNE2PS2BF16_V8BF, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_mask, "__builtin_ia32_cvtne2ps2bf16_v8bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_V8BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v8bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2BF16_V8SF, UNKNOWN, (int) V8BF_FTYPE_V8SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V8SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V8SF_V8BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2BF16_V4SF, UNKNOWN, (int) V8BF_FTYPE_V4SF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V4SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V8BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V4SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPBF16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPBF16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPBF16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
@@ -2885,40 +2885,40 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_extendbfsf2_1, "__builtin_ia32_cvtbf2sf
/* AVX512FP16. */
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
@@ -2928,7 +2928,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
@@ -3366,26 +3366,26 @@ BDESC_END (ARGS, ROUND_ARGS)
/* AVX512F. */
BDESC_FIRST (round_args, ROUND_ARGS,
- OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+ OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT)
@@ -3393,72 +3393,72 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd_mask_round", IX86_BUILTIN_GETEXPSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss_mask_round", IX86_BUILTIN_GETEXPSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_mask_round, "__builtin_ia32_getmantsd_mask_round", IX86_BUILTIN_GETMANTSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_mask_round, "__builtin_ia32_getmantss_mask_round", IX86_BUILTIN_GETMANTSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
@@ -3479,12 +3479,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_sse_cvttss2si_round, "__built
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
@@ -3495,100 +3495,100 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
/* AVX512DQ. */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
/* AVX512FP16. */
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
@@ -3601,32 +3601,32 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
@@ -3634,18 +3634,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fma_fcmaddcsh_v8hf_round, "__builtin_ia32_vfcmaddcsh_round", IX86_BUILTIN_VFCMADDCSH_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask1_round, "__builtin_ia32_vfcmaddcsh_mask_round", IX86_BUILTIN_VFCMADDCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask3_round, "__builtin_ia32_vfcmaddcsh_mask3_round", IX86_BUILTIN_VFCMADDCSH_MASK3_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 2e7381b..4835b94 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -801,102 +801,102 @@ ix86_init_mmx_sse_builtins (void)
IX86_BUILTIN_GATHERALTDIV8SI);
/* AVX512F */
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv16sf",
V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
IX86_BUILTIN_GATHER3SIV16SF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv8df",
V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
IX86_BUILTIN_GATHER3SIV8DF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv16sf",
V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV16SF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv8df",
V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV8DF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv16si",
V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
IX86_BUILTIN_GATHER3SIV16SI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gathersiv8di",
V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
IX86_BUILTIN_GATHER3SIV8DI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv16si",
V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV16SI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gatherdiv8di",
V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
IX86_BUILTIN_GATHER3DIV8DI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altsiv8df ",
V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
IX86_BUILTIN_GATHER3ALTSIV8DF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altdiv16sf ",
V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
IX86_BUILTIN_GATHER3ALTDIV16SF);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altsiv8di ",
V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
IX86_BUILTIN_GATHER3ALTSIV8DI);
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_gather3altdiv16si ",
V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
IX86_BUILTIN_GATHER3ALTDIV16SI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv16sf",
VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
IX86_BUILTIN_SCATTERSIV16SF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv8df",
VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
IX86_BUILTIN_SCATTERSIV8DF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv16sf",
VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
IX86_BUILTIN_SCATTERDIV16SF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv8df",
VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
IX86_BUILTIN_SCATTERDIV8DF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv16si",
VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
IX86_BUILTIN_SCATTERSIV16SI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scattersiv8di",
VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
IX86_BUILTIN_SCATTERSIV8DI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv16si",
VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
IX86_BUILTIN_SCATTERDIV16SI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatterdiv8di",
VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
IX86_BUILTIN_SCATTERDIV8DI);
@@ -1046,22 +1046,22 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
IX86_BUILTIN_SCATTERDIV2DI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltsiv8df ",
VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
IX86_BUILTIN_SCATTERALTSIV8DF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltdiv16sf ",
VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
IX86_BUILTIN_SCATTERALTDIV16SF);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltsiv8di ",
VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
IX86_BUILTIN_SCATTERALTSIV8DI);
- def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+ def_builtin (OPTION_MASK_ISA_AVX512F, 0,
"__builtin_ia32_scatteraltdiv16si ",
VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
IX86_BUILTIN_SCATTERALTDIV16SI);
@@ -1676,7 +1676,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
enum ix86_builtins code;
const machine_mode mode = TYPE_MODE (TREE_TYPE (mem_vectype));
- if ((!TARGET_AVX512F || !TARGET_EVEX512) && GET_MODE_SIZE (mode) == 64)
+ if (!TARGET_AVX512F && GET_MODE_SIZE (mode) == 64)
return NULL_TREE;
if (! TARGET_AVX2
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 0a320ca..457aa05 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -729,12 +729,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SHA512__");
if (isa_flag2 & OPTION_MASK_ISA2_SM4)
def_or_undef (parse_in, "__SM4__");
- if (isa_flag2 & OPTION_MASK_ISA2_EVEX512)
- def_or_undef (parse_in, "__EVEX512__");
if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR)
def_or_undef (parse_in, "__USER_MSR__");
- if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256)
- def_or_undef (parse_in, "__AVX10_1_256__");
if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1)
def_or_undef (parse_in, "__AVX10_1__");
if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..423fc63 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3396,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
too common scenario. */
start_sequence ();
compare_op = ix86_expand_fp_compare (code, op0, op1);
- compare_seq = get_insns ();
- end_sequence ();
+ compare_seq = end_sequence ();
if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
@@ -3561,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[])
start_sequence ();
compare_op = ix86_expand_compare (code, op0, op1);
- compare_seq = get_insns ();
- end_sequence ();
+ compare_seq = end_sequence ();
compare_code = GET_CODE (compare_op);
@@ -3611,7 +3609,11 @@ ix86_expand_int_movcc (rtx operands[])
negate_cc_compare_p = true;
}
- diff = ct - cf;
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
+
/* Sign bit compares are better done using shifts than we do by using
sbb. */
if (sign_bit_compare_p
@@ -3669,7 +3671,12 @@ ix86_expand_int_movcc (rtx operands[])
PUT_CODE (compare_op,
reverse_condition (GET_CODE (compare_op)));
}
- diff = ct - cf;
+
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
if (reg_overlap_mentioned_p (out, compare_op))
tmp = gen_reg_rtx (mode);
@@ -3687,7 +3694,12 @@ ix86_expand_int_movcc (rtx operands[])
else
{
std::swap (ct, cf);
- diff = ct - cf;
+
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
}
tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
}
@@ -3754,9 +3766,15 @@ ix86_expand_int_movcc (rtx operands[])
tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
}
+ HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+ return false;
+
tmp = expand_simple_binop (mode, AND,
copy_rtx (tmp),
- gen_int_mode (cf - ct, mode),
+ gen_int_mode (ival, mode),
copy_rtx (tmp), 1, OPTAB_DIRECT);
if (ct)
tmp = expand_simple_binop (mode, PLUS,
@@ -3793,7 +3811,13 @@ ix86_expand_int_movcc (rtx operands[])
if (new_code != UNKNOWN)
{
std::swap (ct, cf);
- diff = -diff;
+
+ diff = (unsigned HOST_WIDE_INT) ct - cf;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+ return false;
+
code = new_code;
}
}
@@ -3996,8 +4020,14 @@ ix86_expand_int_movcc (rtx operands[])
copy_rtx (out), 1, OPTAB_DIRECT);
}
+ HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+ /* Make sure we can represent the difference
+ between the two values. */
+ if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+ return false;
+
out = expand_simple_binop (mode, AND, copy_rtx (out),
- gen_int_mode (cf - ct, mode),
+ gen_int_mode (ival, mode),
copy_rtx (out), 1, OPTAB_DIRECT);
if (ct)
out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
@@ -4138,6 +4168,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
return false;
mode = GET_MODE (dest);
+ if (immediate_operand (if_false, mode))
+ if_false = force_reg (mode, if_false);
+ if (immediate_operand (if_true, mode))
+ if_true = force_reg (mode, if_true);
/* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
but MODE may be a vector mode and thus not appropriate. */
@@ -4186,7 +4220,7 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW)
return false;
- return (vector_size == 64 && TARGET_EVEX512) || TARGET_AVX512VL;
+ return vector_size == 64 || TARGET_AVX512VL;
}
/* Return true if integer mask comparison should be used. */
@@ -4687,6 +4721,8 @@ ix86_expand_fp_movcc (rtx operands[])
compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
}
+ operands[2] = force_reg (mode, operands[2]);
+ operands[3] = force_reg (mode, operands[3]);
emit_insn (gen_rtx_SET (operands[0],
gen_rtx_IF_THEN_ELSE (mode, compare_op,
operands[2], operands[3])));
@@ -5022,7 +5058,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
/* Don't do it if not using integer masks and we'd end up with
the right values in the registers though. */
- && ((GET_MODE_SIZE (mode) == 64 && TARGET_EVEX512)
+ && (GET_MODE_SIZE (mode) == 64
|| !vector_all_ones_operand (optrue, data_mode)
|| opfalse != CONST0_RTX (data_mode))))
{
@@ -8901,31 +8937,34 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
/* Return true if ALG can be used in current context.
Assume we expand memset if MEMSET is true. */
static bool
-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
+alg_usable_p (enum stringop_alg alg, bool memset,
+ addr_space_t dst_as, addr_space_t src_as)
{
if (alg == no_stringop)
return false;
/* It is not possible to use a library call if we have non-default
address space. We can do better than the generic byte-at-a-time
loop, used as a fallback. */
- if (alg == libcall && have_as)
+ if (alg == libcall &&
+ !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as)))
return false;
if (alg == vector_loop)
return TARGET_SSE || TARGET_AVX;
/* Algorithms using the rep prefix want at least edi and ecx;
additionally, memset wants eax and memcpy wants esi. Don't
consider such algorithms if the user has appropriated those
- registers for their own purposes, or if we have a non-default
- address space, since some string insns cannot override the segment. */
+ registers for their own purposes, or if we have the destination
+ in the non-default address space, since string insns cannot
+ override the destination segment. */
if (alg == rep_prefix_1_byte
|| alg == rep_prefix_4_byte
|| alg == rep_prefix_8_byte)
{
- if (have_as)
- return false;
if (fixed_regs[CX_REG]
|| fixed_regs[DI_REG]
- || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
+ || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])
+ || !ADDR_SPACE_GENERIC_P (dst_as)
+ || !(ADDR_SPACE_GENERIC_P (src_as) || Pmode == word_mode))
return false;
}
return true;
@@ -8935,8 +8974,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
static enum stringop_alg
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
- bool memset, bool zero_memset, bool have_as,
- int *dynamic_check, bool *noalign, bool recur)
+ bool memset, bool zero_memset, addr_space_t dst_as,
+ addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur)
{
const struct stringop_algs *algs;
bool optimize_for_speed;
@@ -8968,7 +9007,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
{
enum stringop_alg candidate = algs->size[i].alg;
- bool usable = alg_usable_p (candidate, memset, have_as);
+ bool usable = alg_usable_p (candidate, memset, dst_as, src_as);
any_alg_usable_p |= usable;
if (candidate != libcall && candidate && usable)
@@ -8984,17 +9023,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* If user specified the algorithm, honor it if possible. */
if (ix86_stringop_alg != no_stringop
- && alg_usable_p (ix86_stringop_alg, memset, have_as))
+ && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as))
return ix86_stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
else if (!optimize_for_speed)
{
*noalign = true;
if (!count || (count & 3) || (memset && !zero_memset))
- return alg_usable_p (rep_prefix_1_byte, memset, have_as)
+ return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as)
? rep_prefix_1_byte : loop_1_byte;
else
- return alg_usable_p (rep_prefix_4_byte, memset, have_as)
+ return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as)
? rep_prefix_4_byte : loop;
}
/* Very tiny blocks are best handled via the loop, REP is expensive to
@@ -9018,7 +9057,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
enum stringop_alg candidate = algs->size[i].alg;
if (candidate != libcall
- && alg_usable_p (candidate, memset, have_as))
+ && alg_usable_p (candidate, memset, dst_as, src_as))
{
alg = candidate;
alg_noalign = algs->size[i].noalign;
@@ -9038,7 +9077,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
else if (!any_alg_usable_p)
break;
}
- else if (alg_usable_p (candidate, memset, have_as)
+ else if (alg_usable_p (candidate, memset, dst_as, src_as)
&& !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB
&& candidate == rep_prefix_1_byte
/* NB: If min_size != max_size, size is
@@ -9060,7 +9099,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
choice in ix86_costs. */
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
&& (algs->unknown_size == libcall
- || !alg_usable_p (algs->unknown_size, memset, have_as)))
+ || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as)))
{
enum stringop_alg alg;
HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
@@ -9075,8 +9114,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
*dynamic_check = 128;
return loop_1_byte;
}
- alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
- zero_memset, have_as, dynamic_check, noalign, true);
+ alg = decide_alg (count, new_expected_size, min_size, max_size,
+ memset, zero_memset, dst_as, src_as,
+ dynamic_check, noalign, true);
gcc_assert (*dynamic_check == -1);
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = max;
@@ -9088,7 +9128,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* Try to use some reasonable fallback algorithm. Note that for
non-default address spaces we default to a loop instead of
a libcall. */
- return (alg_usable_p (algs->unknown_size, memset, have_as)
+
+ bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as)
+ && ADDR_SPACE_GENERIC_P (src_as));
+
+ return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as)
? algs->unknown_size : have_as ? loop : libcall);
}
@@ -9307,14 +9351,13 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
bool need_zero_guard = false;
bool noalign;
machine_mode move_mode = VOIDmode;
- machine_mode wider_mode;
int unroll_factor = 1;
/* TODO: Once value ranges are available, fill in proper data. */
unsigned HOST_WIDE_INT min_size = 0;
unsigned HOST_WIDE_INT max_size = -1;
unsigned HOST_WIDE_INT probable_max_size = -1;
bool misaligned_prologue_used = false;
- bool have_as;
+ addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
@@ -9352,16 +9395,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
if (count > (HOST_WIDE_INT_1U << 30))
return false;
- have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
+ dst_as = MEM_ADDR_SPACE (dst);
if (!issetmem)
- have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
+ src_as = MEM_ADDR_SPACE (src);
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
alg = decide_alg (count, expected_size, min_size, probable_max_size,
- issetmem,
- issetmem && val_exp == const0_rtx, have_as,
- &dynamic_check, &noalign, false);
+ issetmem, issetmem && val_exp == const0_rtx,
+ dst_as, src_as, &dynamic_check, &noalign, false);
if (dump_file)
fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
@@ -9384,6 +9426,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
unroll_factor = 1;
move_mode = word_mode;
+ int nunits;
switch (alg)
{
case libcall:
@@ -9404,27 +9447,14 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
case vector_loop:
need_zero_guard = true;
unroll_factor = 4;
- /* Find the widest supported mode. */
- move_mode = word_mode;
- while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
- && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
- move_mode = wider_mode;
-
- if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
- move_mode = TImode;
- if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
- move_mode = OImode;
-
- /* Find the corresponding vector mode with the same size as MOVE_MODE.
- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
+ /* Get the vector mode to move MOVE_MAX bytes. */
+ nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+ if (nunits > 1)
{
- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
- if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
- || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
- move_mode = word_mode;
+ move_mode = mode_for_vector (word_mode, nunits).require ();
+ gcc_assert (optab_handler (mov_optab, move_mode)
+ != CODE_FOR_nothing);
}
- gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
break;
case rep_prefix_8_byte:
move_mode = DImode;
@@ -11244,6 +11274,54 @@ fixup_modeless_constant (rtx x, machine_mode mode)
return x;
}
+/* Expand the outgoing argument ARG to extract unsigned char and short
+ integer constants suitable for the predicates and the instruction
+ templates which expect the unsigned expanded value. */
+
+static rtx
+ix86_expand_unsigned_small_int_cst_argument (tree arg)
+{
+ /* When passing 0xff as an unsigned char function argument with the
+ C frontend promotion, expand_normal gets
+
+ <integer_cst 0x7fffe6aa23a8 type <integer_type 0x7fffe98225e8 int> constant 255>
+
+ and returns the rtx value using the sign-extended representation:
+
+ (const_int 255 [0xff])
+
+ Without the C frontend promotion, expand_normal gets
+
+ <integer_cst 0x7fffe9824018 type <integer_type 0x7fffe9822348 unsigned char > constant 255>
+
+ and returns
+
+ (const_int -1 [0xffffffffffffffff])
+
+ which doesn't work with the predicates nor the instruction templates
+ which expect the unsigned expanded value. Extract the unsigned char
+ and short integer constants to return
+
+ (const_int 255 [0xff])
+
+ so that the expanded value is always unsigned, without the C frontend
+ promotion. */
+
+ if (TREE_CODE (arg) == INTEGER_CST)
+ {
+ tree type = TREE_TYPE (arg);
+ if (INTEGRAL_TYPE_P (type)
+ && TYPE_UNSIGNED (type)
+ && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+ {
+ HOST_WIDE_INT cst = TREE_INT_CST_LOW (arg);
+ return GEN_INT (cst);
+ }
+ }
+
+ return expand_normal (arg);
+}
+
/* Subroutine of ix86_expand_builtin to take care of insns with
variable number of operands. */
@@ -12142,7 +12220,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
for (i = 0; i < nargs; i++)
{
tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
+ rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
machine_mode mode = insn_p->operand[i + 1].mode;
/* Need to fixup modeless constant before testing predicate. */
op = fixup_modeless_constant (op, mode);
@@ -12837,7 +12915,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
for (i = 0; i < nargs; i++)
{
tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
+ rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
machine_mode mode = insn_p->operand[i + 1].mode;
bool match = insn_p->operand[i + 1].predicate (op, mode);
@@ -13322,7 +13400,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
machine_mode mode = insn_p->operand[i + 1].mode;
arg = CALL_EXPR_ARG (exp, i + arg_adjust);
- op = expand_normal (arg);
+ op = ix86_expand_unsigned_small_int_cst_argument (arg);
if (i == memory)
{
@@ -15466,7 +15544,7 @@ rdseed_step:
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
+ op3 = ix86_expand_unsigned_small_int_cst_argument (arg3);
op4 = expand_normal (arg4);
/* Note the arg order is different from the operand order. */
mode0 = insn_data[icode].operand[1].mode;
@@ -15681,7 +15759,7 @@ rdseed_step:
arg3 = CALL_EXPR_ARG (exp, 3);
arg4 = CALL_EXPR_ARG (exp, 4);
op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
+ op1 = ix86_expand_unsigned_small_int_cst_argument (arg1);
op2 = expand_normal (arg2);
op3 = expand_normal (arg3);
op4 = expand_normal (arg4);
@@ -16130,7 +16208,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
{
case VEC_BCAST_PXOR:
if ((mode == V8SImode && !TARGET_AVX2)
- || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+ || (mode == V16SImode && !TARGET_AVX512F))
return false;
emit_move_insn (target, CONST0_RTX (mode));
return true;
@@ -16138,7 +16216,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
case VEC_BCAST_PCMPEQ:
if ((mode == V4SImode && !TARGET_SSE2)
|| (mode == V8SImode && !TARGET_AVX2)
- || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+ || (mode == V16SImode && !TARGET_AVX512F))
return false;
emit_move_insn (target, CONSTM1_RTX (mode));
return true;
@@ -16158,7 +16236,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V32QImode);
emit_insn (gen_absv32qi2 (tmp2, tmp1));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V64QImode);
emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16184,7 +16262,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V32QImode);
emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V64QImode);
emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16210,7 +16288,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V16HImode);
emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V32HImode);
emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16236,7 +16314,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg)));
return true;
}
- else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512F)
{
tmp1 = gen_reg_rtx (V16SImode);
emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16262,7 +16340,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
tmp2 = gen_reg_rtx (V16HImode);
emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
}
- else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512BW)
{
tmp1 = gen_reg_rtx (V32HImode);
emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16288,7 +16366,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg)));
return true;
}
- else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+ else if (mode == V16SImode && TARGET_AVX512F)
{
tmp1 = gen_reg_rtx (V16SImode);
emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16342,8 +16420,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
if (GET_MODE (reg) != innermode)
reg = gen_lowpart (innermode, reg);
SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (seq)
emit_insn_before (seq, insn);
@@ -16659,7 +16736,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
case E_V32HFmode:
case E_V32BFmode:
- gcc_assert (TARGET_EVEX512);
if (TARGET_AVX512BW)
return ix86_vector_duplicate_value (mode, target, val);
else
@@ -16712,9 +16788,6 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
bool use_vector_set = false;
rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
- if (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
- return false;
-
switch (mode)
{
case E_V2DImode:
@@ -18670,6 +18743,33 @@ emit_reduc_half (rtx dest, rtx src, int i)
case E_V8HFmode:
case E_V4SImode:
case E_V2DImode:
+ if (TARGET_SSE_REDUCTION_PREFER_PSHUF)
+ {
+ if (i == 128)
+ {
+ d = gen_reg_rtx (V4SImode);
+ tem = gen_sse2_pshufd_1 (
+ d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+ GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3));
+ break;
+ }
+ else if (i == 64)
+ {
+ d = gen_reg_rtx (V4SImode);
+ tem = gen_sse2_pshufd_1 (
+ d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+ GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+ break;
+ }
+ else if (i == 32)
+ {
+ d = gen_reg_rtx (V8HImode);
+ tem = gen_sse2_pshuflw_1 (
+ d, force_reg (V8HImode, gen_lowpart (V8HImode, src)),
+ GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+ break;
+ }
+ }
d = gen_reg_rtx (V1TImode);
tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
GEN_INT (i / 2));
@@ -19256,8 +19356,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19368,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
@@ -19356,7 +19476,7 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
unsigned vector_size = GET_MODE_SIZE (mode);
if (TARGET_FMA
- || (TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+ || (TARGET_AVX512F && vector_size == 64)
|| (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
emit_insn (gen_rtx_SET (e2,
gen_rtx_FMA (mode, e0, x0, mthree)));
@@ -22018,8 +22138,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
start_sequence ();
ok = expand_vec_perm_1 (&dfinal);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (!ok)
return false;
@@ -22355,8 +22474,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
start_sequence ();
ok = expand_vec_perm_1 (&dfirst);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (!ok)
return false;
@@ -22464,8 +22582,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dfirst);
- seq1 = get_insns ();
- end_sequence ();
+ seq1 = end_sequence ();
if (!ok)
return false;
@@ -22475,8 +22592,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dsecond);
- seq2 = get_insns ();
- end_sequence ();
+ seq2 = end_sequence ();
if (!ok)
return false;
@@ -22590,8 +22706,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dfirst);
- seq1 = get_insns ();
- end_sequence ();
+ seq1 = end_sequence ();
if (!ok)
return false;
@@ -22601,8 +22716,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
{
start_sequence ();
ok = expand_vec_perm_1 (&dsecond);
- seq2 = get_insns ();
- end_sequence ();
+ seq2 = end_sequence ();
if (!ok)
return false;
@@ -22796,8 +22910,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
canonicalize_perm (&dfirst);
start_sequence ();
ok = ix86_expand_vec_perm_const_1 (&dfirst);
- seq1 = get_insns ();
- end_sequence ();
+ seq1 = end_sequence ();
if (!ok)
return false;
@@ -22805,8 +22918,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
canonicalize_perm (&dsecond);
start_sequence ();
ok = ix86_expand_vec_perm_const_1 (&dsecond);
- seq2 = get_insns ();
- end_sequence ();
+ seq2 = end_sequence ();
if (!ok)
return false;
@@ -24290,9 +24402,6 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
unsigned int i, nelt, which;
bool two_args;
- if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
- return false;
-
/* For HF and BF mode vector, convert it to HI using subreg. */
if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
{
@@ -24834,7 +24943,6 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
ix86_expand_vecop_qihi. */
if (!TARGET_AVX512BW
|| (qimode == V16QImode && !TARGET_AVX512VL)
- || (qimode == V32QImode && !TARGET_EVEX512)
/* There are no V64HImode instructions. */
|| qimode == V64QImode)
return false;
@@ -25303,7 +25411,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
machine_mode mode = GET_MODE (op0);
rtx t1, t2, t3, t4, t5, t6;
- if (TARGET_AVX512DQ && TARGET_EVEX512 && mode == V8DImode)
+ if (TARGET_AVX512DQ && mode == V8DImode)
emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
@@ -26033,8 +26141,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
}
}
- *prep_seq = get_insns ();
- end_sequence ();
+ *prep_seq = end_sequence ();
start_sequence ();
@@ -26045,8 +26152,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
end_sequence ();
return NULL_RTX;
}
- *gen_seq = get_insns ();
- end_sequence ();
+ *gen_seq = end_sequence ();
return res;
}
@@ -26089,8 +26195,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
return NULL_RTX;
}
- *prep_seq = get_insns ();
- end_sequence ();
+ *prep_seq = end_sequence ();
target = gen_rtx_REG (cc_mode, FLAGS_REG);
dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
@@ -26121,8 +26226,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
return NULL_RTX;
}
- *gen_seq = get_insns ();
- end_sequence ();
+ *gen_seq = end_sequence ();
return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
}
@@ -26136,8 +26240,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x)
{
if (!TARGET_AVX512F
|| !CONST_VECTOR_P (x)
- || (!TARGET_AVX512VL
- && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512))
+ || (!TARGET_AVX512VL && GET_MODE_SIZE (mode) != 64)
|| !VALID_BCST_MODE_P (GET_MODE_INNER (mode))
/* Disallow HFmode broadcast. */
|| GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4)
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c35ac24..56ab7f2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
insns_conv = BITMAP_ALLOC (NULL);
queue = NULL;
- n_sse_to_integer = 0;
- n_integer_to_sse = 0;
-
+ cost_sse_integer = 0;
+ weighted_cost_sse_integer = 0 ;
max_visits = x86_stv_max_visits;
}
@@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def)
/* Record the def/insn pair so we can later efficiently iterate over
the defs to convert on insns not in the chain. */
bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
+ basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
+ profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ bool speed_p = optimize_bb_for_speed_p (bb);
+ int cost = 0;
+
if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
{
if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
&& !reg_new)
return;
- n_integer_to_sse++;
+
+ /* Cost integer to sse moves. */
+ if (speed_p)
+ cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+ else if (TARGET_64BIT || smode == SImode)
+ cost = COSTS_N_BYTES (4);
+ /* vmovd (4 bytes) + vpinsrd (6 bytes). */
+ else if (TARGET_SSE4_1)
+ cost = COSTS_N_BYTES (10);
+ /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */
+ else
+ cost = COSTS_N_BYTES (12);
}
else
{
if (!reg_new)
return;
- n_sse_to_integer++;
+
+ /* Cost sse to integer moves. */
+ if (speed_p)
+ cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
+ else if (TARGET_64BIT || smode == SImode)
+ cost = COSTS_N_BYTES (4);
+ /* vmovd (4 bytes) + vpextrd (6 bytes). */
+ else if (TARGET_SSE4_1)
+ cost = COSTS_N_BYTES (10);
+ /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */
+ else
+ cost = COSTS_N_BYTES (13);
}
+ if (speed_p)
+ weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
+
+ cost_sse_integer += cost;
+
if (dump_file)
fprintf (dump_file,
" Mark r%d def in insn %d as requiring both modes in chain #%d\n",
@@ -518,26 +549,28 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
instead of using a scalar one. */
int
-general_scalar_chain::vector_const_cost (rtx exp)
+general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
{
gcc_assert (CONST_INT_P (exp));
if (standard_sse_constant_p (exp, vmode))
return ix86_cost->sse_op;
+ if (optimize_bb_for_size_p (bb))
+ return COSTS_N_BYTES (8);
/* We have separate costs for SImode and DImode, use SImode costs
for smaller modes. */
- return ix86_cost->sse_load[smode == DImode ? 1 : 0];
+ return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
}
-/* Compute a gain for chain conversion. */
+/* Return true if it's cost profitable for chain conversion. */
-int
+bool
general_scalar_chain::compute_convert_gain ()
{
bitmap_iterator bi;
unsigned insn_uid;
int gain = 0;
- int cost = 0;
+ sreal weighted_gain = 0;
if (dump_file)
fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -547,7 +580,7 @@ general_scalar_chain::compute_convert_gain ()
smaller modes than SImode the int load/store costs need to be
adjusted as well. */
unsigned sse_cost_idx = smode == DImode ? 1 : 0;
- unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
+ int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
{
@@ -555,26 +588,58 @@ general_scalar_chain::compute_convert_gain ()
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
+ basic_block bb = BLOCK_FOR_INSN (insn);
int igain = 0;
+ profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ bool speed_p = optimize_bb_for_speed_p (bb);
+ sreal bb_freq = bb->count.to_sreal_scale (entry_count);
if (REG_P (src) && REG_P (dst))
- igain += 2 * m - ix86_cost->xmm_move;
+ {
+ if (!speed_p)
+ /* reg-reg move is 2 bytes, while SSE 3. */
+ igain += COSTS_N_BYTES (2 * m - 3);
+ else
+ /* Move costs are normalized to reg-reg move having cost 2. */
+ igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
+ }
else if (REG_P (src) && MEM_P (dst))
- igain
- += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
+ {
+ if (!speed_p)
+ /* Integer load/store is 3+ bytes and SSE 4+. */
+ igain += COSTS_N_BYTES (3 * m - 4);
+ else
+ igain
+ += COSTS_N_INSNS (m * ix86_cost->int_store[2]
+ - ix86_cost->sse_store[sse_cost_idx]) / 2;
+ }
else if (MEM_P (src) && REG_P (dst))
- igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
+ {
+ if (!speed_p)
+ igain += COSTS_N_BYTES (3 * m - 4);
+ else
+ igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
+ - ix86_cost->sse_load[sse_cost_idx]) / 2;
+ }
else
{
/* For operations on memory operands, include the overhead
of explicit load and store instructions. */
if (MEM_P (dst))
- igain += optimize_insn_for_size_p ()
- ? -COSTS_N_BYTES (8)
- : (m * (ix86_cost->int_load[2]
- + ix86_cost->int_store[2])
- - (ix86_cost->sse_load[sse_cost_idx] +
- ix86_cost->sse_store[sse_cost_idx]));
+ {
+ if (!speed_p)
+ /* ??? This probably should account size difference
+ of SSE and integer load rather than full SSE load. */
+ igain -= COSTS_N_BYTES (8);
+ else
+ {
+ int cost = (m * (ix86_cost->int_load[2]
+ + ix86_cost->int_store[2])
+ - (ix86_cost->sse_load[sse_cost_idx] +
+ ix86_cost->sse_store[sse_cost_idx]));
+ igain += COSTS_N_INSNS (cost) / 2;
+ }
+ }
switch (GET_CODE (src))
{
@@ -595,7 +660,7 @@ general_scalar_chain::compute_convert_gain ()
igain += ix86_cost->shift_const - ix86_cost->sse_op;
if (CONST_INT_P (XEXP (src, 0)))
- igain -= vector_const_cost (XEXP (src, 0));
+ igain -= vector_const_cost (XEXP (src, 0), bb);
break;
case ROTATE:
@@ -631,16 +696,17 @@ general_scalar_chain::compute_convert_gain ()
igain += m * ix86_cost->add;
if (CONST_INT_P (XEXP (src, 0)))
- igain -= vector_const_cost (XEXP (src, 0));
+ igain -= vector_const_cost (XEXP (src, 0), bb);
if (CONST_INT_P (XEXP (src, 1)))
- igain -= vector_const_cost (XEXP (src, 1));
+ igain -= vector_const_cost (XEXP (src, 1), bb);
if (MEM_P (XEXP (src, 1)))
{
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
else
- igain += m * ix86_cost->int_load[2]
- - ix86_cost->sse_load[sse_cost_idx];
+ igain += COSTS_N_INSNS
+ (m * ix86_cost->int_load[2]
+ - ix86_cost->sse_load[sse_cost_idx]) / 2;
}
break;
@@ -698,7 +764,7 @@ general_scalar_chain::compute_convert_gain ()
case CONST_INT:
if (REG_P (dst))
{
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
/* xor (2 bytes) vs. xorps (3 bytes). */
if (src == const0_rtx)
@@ -722,14 +788,14 @@ general_scalar_chain::compute_convert_gain ()
/* DImode can be immediate for TARGET_64BIT
and SImode always. */
igain += m * COSTS_N_INSNS (1);
- igain -= vector_const_cost (src);
+ igain -= vector_const_cost (src, bb);
}
}
else if (MEM_P (dst))
{
igain += (m * ix86_cost->int_store[2]
- ix86_cost->sse_store[sse_cost_idx]);
- igain -= vector_const_cost (src);
+ igain -= vector_const_cost (src, bb);
}
break;
@@ -737,13 +803,14 @@ general_scalar_chain::compute_convert_gain ()
if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
{
// movd (4 bytes) replaced with movdqa (4 bytes).
- if (!optimize_insn_for_size_p ())
- igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+ if (!!speed_p)
+ igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
+ - ix86_cost->xmm_move) / 2;
}
else
{
// pshufd; movd replaced with pshufd.
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
igain += COSTS_N_BYTES (4);
else
igain += ix86_cost->sse_to_integer;
@@ -755,55 +822,34 @@ general_scalar_chain::compute_convert_gain ()
}
}
+ if (speed_p)
+ weighted_gain += bb_freq * igain;
+ gain += igain;
+
if (igain != 0 && dump_file)
{
- fprintf (dump_file, " Instruction gain %d for ", igain);
+ fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for",
+ igain, bb_freq.to_double ());
dump_insn_slim (dump_file, insn);
}
- gain += igain;
}
if (dump_file)
- fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
-
- /* Cost the integer to sse and sse to integer moves. */
- if (!optimize_function_for_size_p (cfun))
- {
- cost += n_sse_to_integer * ix86_cost->sse_to_integer;
- /* ??? integer_to_sse but we only have that in the RA cost table.
- Assume sse_to_integer/integer_to_sse are the same which they
- are at the moment. */
- cost += n_integer_to_sse * ix86_cost->sse_to_integer;
- }
- else if (TARGET_64BIT || smode == SImode)
{
- cost += n_sse_to_integer * COSTS_N_BYTES (4);
- cost += n_integer_to_sse * COSTS_N_BYTES (4);
- }
- else if (TARGET_SSE4_1)
- {
- /* vmovd (4 bytes) + vpextrd (6 bytes). */
- cost += n_sse_to_integer * COSTS_N_BYTES (10);
- /* vmovd (4 bytes) + vpinsrd (6 bytes). */
- cost += n_integer_to_sse * COSTS_N_BYTES (10);
- }
- else
- {
- /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */
- cost += n_sse_to_integer * COSTS_N_BYTES (13);
- /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */
- cost += n_integer_to_sse * COSTS_N_BYTES (12);
+ fprintf (dump_file, " Instruction conversion gain: %d, \n",
+ gain);
+ fprintf (dump_file, " Registers conversion cost: %d\n",
+ cost_sse_integer);
+ fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n",
+ weighted_gain.to_double ());
+ fprintf (dump_file, " Weighted registers conversion cost: %.2f\n",
+ weighted_cost_sse_integer.to_double ());
}
- if (dump_file)
- fprintf (dump_file, " Registers conversion cost: %d\n", cost);
-
- gain -= cost;
-
- if (dump_file)
- fprintf (dump_file, " Total gain: %d\n", gain);
-
- return gain;
+ if (weighted_gain != weighted_cost_sse_integer)
+ return weighted_gain > weighted_cost_sse_integer;
+ else
+ return gain > cost_sse_integer;;
}
/* Insert generated conversion instruction sequence INSNS
@@ -902,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
else
emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
gen_gpr_to_xmm_move_src (vmode, reg)));
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_conversion_insns (seq, insn);
if (dump_file)
@@ -970,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
else
emit_move_insn (dst, src);
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_conversion_insns (seq, insn);
if (dump_file)
@@ -1066,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
start_sequence ();
vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn_before (seq, insn);
}
@@ -1508,33 +1551,34 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
with numerous special cases. */
static int
-timode_immed_const_gain (rtx cst)
+timode_immed_const_gain (rtx cst, basic_block bb)
{
/* movabsq vs. movabsq+vmovq+vunpacklqdq. */
if (CONST_WIDE_INT_P (cst)
&& CONST_WIDE_INT_NUNITS (cst) == 2
&& CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
- return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+ return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
: -COSTS_N_INSNS (2);
/* 2x movabsq ~ vmovdqa. */
return 0;
}
-/* Compute a gain for chain conversion. */
+/* Return true it's cost profitable for for chain conversion. */
-int
+bool
timode_scalar_chain::compute_convert_gain ()
{
/* Assume that if we have to move TImode values between units,
then transforming this chain isn't worth it. */
- if (n_sse_to_integer || n_integer_to_sse)
- return -1;
+ if (cost_sse_integer)
+ return false;
bitmap_iterator bi;
unsigned insn_uid;
/* Split ties to prefer V1TImode when not optimizing for size. */
int gain = optimize_size ? 0 : 1;
+ sreal weighted_gain = 0;
if (dump_file)
fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -1546,34 +1590,36 @@ timode_scalar_chain::compute_convert_gain ()
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
HOST_WIDE_INT op1val;
+ basic_block bb = BLOCK_FOR_INSN (insn);
int scost, vcost;
int igain = 0;
+ profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ bool speed_p = optimize_bb_for_speed_p (bb);
+ sreal bb_freq = bb->count.to_sreal_scale (entry_count);
switch (GET_CODE (src))
{
case REG:
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
else
igain = COSTS_N_INSNS (1);
break;
case MEM:
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
- : COSTS_N_INSNS (1);
+ igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
break;
case CONST_INT:
if (MEM_P (dst)
&& standard_sse_constant_p (src, V1TImode))
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+ igain = !speed_p ? COSTS_N_BYTES (11) : 1;
break;
case CONST_WIDE_INT:
/* 2 x mov vs. vmovdqa. */
if (MEM_P (dst))
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
- : COSTS_N_INSNS (1);
+ igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
break;
case NOT:
@@ -1587,14 +1633,14 @@ timode_scalar_chain::compute_convert_gain ()
if (!MEM_P (dst))
igain = COSTS_N_INSNS (1);
if (CONST_SCALAR_INT_P (XEXP (src, 1)))
- igain += timode_immed_const_gain (XEXP (src, 1));
+ igain += timode_immed_const_gain (XEXP (src, 1), bb);
break;
case ASHIFT:
case LSHIFTRT:
/* See ix86_expand_v1ti_shift. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
if (op1val == 64 || op1val == 65)
scost = COSTS_N_BYTES (5);
@@ -1628,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain ()
case ASHIFTRT:
/* See ix86_expand_v1ti_ashiftrt. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
if (op1val == 64 || op1val == 127)
scost = COSTS_N_BYTES (7);
@@ -1706,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain ()
case ROTATERT:
/* See ix86_expand_v1ti_rotate. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (!speed_p)
{
scost = COSTS_N_BYTES (13);
if ((op1val & 31) == 0)
@@ -1738,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain ()
{
if (GET_CODE (XEXP (src, 0)) == AND)
/* and;and;or (9 bytes) vs. ptest (5 bytes). */
- igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
- : COSTS_N_INSNS (2);
+ igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
/* or (3 bytes) vs. ptest (5 bytes). */
- else if (optimize_insn_for_size_p ())
+ else if (!speed_p)
igain = -COSTS_N_BYTES (2);
}
else if (XEXP (src, 1) == const1_rtx)
/* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */
- igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
- : -COSTS_N_INSNS (1);
+ igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
break;
default:
break;
}
+ gain += igain;
+ if (speed_p)
+ weighted_gain += bb_freq * igain;
+
if (igain != 0 && dump_file)
{
- fprintf (dump_file, " Instruction gain %d for ", igain);
+ fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ",
+ igain, bb_freq.to_double ());
dump_insn_slim (dump_file, insn);
}
- gain += igain;
}
if (dump_file)
- fprintf (dump_file, " Total gain: %d\n", gain);
+ fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n",
+ gain, weighted_gain.to_double ());
- return gain;
+ if (weighted_gain > (sreal) 0)
+ return true;
+ else
+ return gain > 0;
}
/* Fix uses of converted REG in debug insns. */
@@ -1874,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
src = validize_mem (force_const_mem (V1TImode, src));
use_move = MEM_P (dst);
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
if (seq)
emit_insn_before (seq, insn);
if (use_move)
@@ -2561,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p)
conversions. */
if (chain->build (&candidates[i], uid, disallowed))
{
- if (chain->compute_convert_gain () > 0)
+ if (chain->compute_convert_gain ())
converted_insns += chain->convert ();
else if (dump_file)
fprintf (dump_file, "Chain #%d conversion is not profitable\n",
@@ -3034,6 +3085,39 @@ ix86_rpad_gate ()
&& optimize_function_for_speed_p (cfun));
}
+/* Generate a vector set, DEST = SRC, at entry of the nearest dominator
+ for basic block map BBS, which is in the fake loop that contains the
+ whole function, so that there is only a single vector set in the
+ whole function. */
+
+static void
+ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs)
+{
+ basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+ while (bb->loop_father->latch
+ != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ bb = get_immediate_dominator (CDI_DOMINATORS,
+ bb->loop_father->header);
+
+ rtx set = gen_rtx_SET (dest, src);
+
+ rtx_insn *insn = BB_HEAD (bb);
+ while (insn && !NONDEBUG_INSN_P (insn))
+ {
+ if (insn == BB_END (bb))
+ {
+ insn = NULL;
+ break;
+ }
+ insn = NEXT_INSN (insn);
+ }
+
+ if (insn == BB_HEAD (bb))
+ emit_insn_before (set, insn);
+ else
+ emit_insn_after (set, insn ? PREV_INSN (insn) : BB_END (bb));
+}
+
/* At entry of the nearest common dominator for basic blocks with
conversions/rcp/sqrt/rsqrt/round, generate a single
vxorps %xmmN, %xmmN, %xmmN
@@ -3155,7 +3239,6 @@ remove_partial_avx_dependency (void)
/* Generate an XMM vector SET. */
set = gen_rtx_SET (vec, src);
set_insn = emit_insn_before (set, insn);
- df_insn_rescan (set_insn);
if (cfun->can_throw_non_call_exceptions)
{
@@ -3188,35 +3271,10 @@ remove_partial_avx_dependency (void)
calculate_dominance_info (CDI_DOMINATORS);
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
- /* Generate a vxorps at entry of the nearest dominator for basic
- blocks with conversions, which is in the fake loop that
- contains the whole function, so that there is only a single
- vxorps in the whole function. */
- bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
- convert_bbs);
- while (bb->loop_father->latch
- != EXIT_BLOCK_PTR_FOR_FN (cfun))
- bb = get_immediate_dominator (CDI_DOMINATORS,
- bb->loop_father->header);
-
- set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
-
- insn = BB_HEAD (bb);
- while (insn && !NONDEBUG_INSN_P (insn))
- {
- if (insn == BB_END (bb))
- {
- insn = NULL;
- break;
- }
- insn = NEXT_INSN (insn);
- }
- if (insn == BB_HEAD (bb))
- set_insn = emit_insn_before (set, insn);
- else
- set_insn = emit_insn_after (set,
- insn ? PREV_INSN (insn) : BB_END (bb));
- df_insn_rescan (set_insn);
+ ix86_place_single_vector_set (v4sf_const0,
+ CONST0_RTX (V4SFmode),
+ convert_bbs);
+
loop_optimizer_finalize ();
if (!control_flow_insns.is_empty ())
@@ -3288,6 +3346,270 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
return new pass_remove_partial_avx_dependency (ctxt);
}
+/* Return a machine mode suitable for vector SIZE. */
+
+static machine_mode
+ix86_get_vector_load_mode (unsigned int size)
+{
+ machine_mode mode;
+ if (size == 64)
+ mode = V64QImode;
+ else if (size == 32)
+ mode = V32QImode;
+ else if (size == 16)
+ mode = V16QImode;
+ else if (size == 8)
+ mode = V8QImode;
+ else if (size == 4)
+ mode = V4QImode;
+ else if (size == 2)
+ mode = V2QImode;
+ else
+ gcc_unreachable ();
+ return mode;
+}
+
+/* Replace the source operand of instructions in VECTOR_INSNS with
+ VECTOR_CONST in VECTOR_MODE. */
+
+static void
+replace_vector_const (machine_mode vector_mode, rtx vector_const,
+ auto_bitmap &vector_insns)
+{
+ bitmap_iterator bi;
+ unsigned int id;
+
+ EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
+ {
+ rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+ /* Get the single SET instruction. */
+ rtx set = single_set (insn);
+ rtx src = SET_SRC (set);
+ machine_mode mode = GET_MODE (src);
+
+ rtx replace;
+ /* Replace the source operand with VECTOR_CONST. */
+ if (SUBREG_P (src) || mode == vector_mode)
+ replace = vector_const;
+ else
+ {
+ unsigned int size = GET_MODE_SIZE (mode);
+ if (size < ix86_regmode_natural_size (mode))
+ {
+ /* If the mode size is smaller than its natural size,
+ first insert an extra move with a QI vector SUBREG
+ of the same size to avoid validate_subreg failure. */
+ machine_mode vmode = ix86_get_vector_load_mode (size);
+ rtx vreg;
+ if (mode == vmode)
+ vreg = vector_const;
+ else
+ {
+ vreg = gen_reg_rtx (vmode);
+ rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+ rtx pat = gen_rtx_SET (vreg, vsubreg);
+ emit_insn_before (pat, insn);
+ }
+ replace = gen_rtx_SUBREG (mode, vreg, 0);
+ }
+ else
+ replace = gen_rtx_SUBREG (mode, vector_const, 0);
+ }
+
+ SET_SRC (set) = replace;
+ /* Drop possible dead definitions. */
+ PATTERN (insn) = set;
+ INSN_CODE (insn) = -1;
+ recog_memoized (insn);
+ df_insn_rescan (insn);
+ }
+}
+
+/* At entry of the nearest common dominator for basic blocks with vector
+ CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
+ vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
+ uses.
+
+ NB: We want to generate only a single widest vector set to cover the
+ whole function. The LCM algorithm isn't appropriate here since it
+ may place a vector set inside the loop. */
+
+static unsigned int
+remove_redundant_vector_load (void)
+{
+ timevar_push (TV_MACH_DEP);
+
+ auto_bitmap zero_bbs;
+ auto_bitmap m1_bbs;
+ auto_bitmap zero_insns;
+ auto_bitmap m1_insns;
+
+ basic_block bb;
+ rtx_insn *insn;
+ unsigned HOST_WIDE_INT zero_count = 0;
+ unsigned HOST_WIDE_INT m1_count = 0;
+ unsigned int zero_size = 0;
+ unsigned int m1_size = 0;
+
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (!set)
+ continue;
+
+ /* Record single set vector instruction with CONST0_RTX and
+ CONSTM1_RTX source. Record basic blocks with CONST0_RTX and
+ CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the
+ maximum size of CONST0_RTX and CONSTM1_RTX. */
+
+ rtx dest = SET_DEST (set);
+ machine_mode mode = GET_MODE (dest);
+ /* Skip non-vector instruction. */
+ if (!VECTOR_MODE_P (mode))
+ continue;
+
+ rtx src = SET_SRC (set);
+ /* Skip non-vector load instruction. */
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ continue;
+
+ if (src == CONST0_RTX (mode))
+ {
+ /* Record vector instruction with CONST0_RTX. */
+ bitmap_set_bit (zero_insns, INSN_UID (insn));
+
+ /* Record the maximum vector size. */
+ if (zero_size < GET_MODE_SIZE (mode))
+ zero_size = GET_MODE_SIZE (mode);
+
+ /* Record the basic block with CONST0_RTX. */
+ bitmap_set_bit (zero_bbs, bb->index);
+ zero_count++;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && src == CONSTM1_RTX (mode))
+ {
+ /* Record vector instruction with CONSTM1_RTX. */
+ bitmap_set_bit (m1_insns, INSN_UID (insn));
+
+ /* Record the maximum vector size. */
+ if (m1_size < GET_MODE_SIZE (mode))
+ m1_size = GET_MODE_SIZE (mode);
+
+ /* Record the basic block with CONSTM1_RTX. */
+ bitmap_set_bit (m1_bbs, bb->index);
+ m1_count++;
+ }
+ }
+ }
+
+ if (zero_count > 1 || m1_count > 1)
+ {
+ machine_mode zero_mode, m1_mode;
+ rtx vector_const0, vector_constm1;
+
+ if (zero_count > 1)
+ {
+ zero_mode = ix86_get_vector_load_mode (zero_size);
+ vector_const0 = gen_reg_rtx (zero_mode);
+ replace_vector_const (zero_mode, vector_const0, zero_insns);
+ }
+ else
+ {
+ zero_mode = VOIDmode;
+ vector_const0 = nullptr;
+ }
+
+ if (m1_count > 1)
+ {
+ m1_mode = ix86_get_vector_load_mode (m1_size);
+ vector_constm1 = gen_reg_rtx (m1_mode);
+ replace_vector_const (m1_mode, vector_constm1, m1_insns);
+ }
+ else
+ {
+ m1_mode = VOIDmode;
+ vector_constm1 = nullptr;
+ }
+
+ /* (Re-)discover loops so that bb->loop_father can be used in the
+ analysis below. */
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+ if (vector_const0)
+ ix86_place_single_vector_set (vector_const0,
+ CONST0_RTX (zero_mode),
+ zero_bbs);
+
+ if (vector_constm1)
+ ix86_place_single_vector_set (vector_constm1,
+ CONSTM1_RTX (m1_mode),
+ m1_bbs);
+
+ loop_optimizer_finalize ();
+
+ df_process_deferred_rescans ();
+ }
+
+ df_clear_flags (DF_DEFER_INSN_RESCAN);
+
+ timevar_pop (TV_MACH_DEP);
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_remove_redundant_vector_load =
+{
+ RTL_PASS, /* type */
+ "rrvl", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_remove_redundant_vector_load : public rtl_opt_pass
+{
+public:
+ pass_remove_redundant_vector_load (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *fun) final override
+ {
+ return (TARGET_SSE2
+ && optimize
+ && optimize_function_for_speed_p (fun));
+ }
+
+ unsigned int execute (function *) final override
+ {
+ return remove_redundant_vector_load ();
+ }
+}; // class pass_remove_redundant_vector_load
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+{
+ return new pass_remove_redundant_vector_load (ctxt);
+}
+
/* Convert legacy instructions that clobbers EFLAGS to APX_NF
instructions when there are no flag set between a flag
producer and user. */
@@ -3962,7 +4284,6 @@ ix86_get_function_versions_dispatcher (void *decl)
struct cgraph_node *node = NULL;
struct cgraph_node *default_node = NULL;
struct cgraph_function_version_info *node_v = NULL;
- struct cgraph_function_version_info *first_v = NULL;
tree dispatch_decl = NULL;
@@ -3979,37 +4300,16 @@ ix86_get_function_versions_dispatcher (void *decl)
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
- /* Find the default version and make it the first node. */
- first_v = node_v;
- /* Go to the beginning of the chain. */
- while (first_v->prev != NULL)
- first_v = first_v->prev;
- default_version_info = first_v;
- while (default_version_info != NULL)
- {
- if (is_function_default_version
- (default_version_info->this_node->decl))
- break;
- default_version_info = default_version_info->next;
- }
+ /* The default node is always the beginning of the chain. */
+ default_version_info = node_v;
+ while (default_version_info->prev != NULL)
+ default_version_info = default_version_info->prev;
+ default_node = default_version_info->this_node;
/* If there is no default node, just return NULL. */
- if (default_version_info == NULL)
+ if (!is_function_default_version (default_node->decl))
return NULL;
- /* Make default info the first node. */
- if (first_v != default_version_info)
- {
- default_version_info->prev->next = default_version_info->next;
- if (default_version_info->next)
- default_version_info->next->prev = default_version_info->prev;
- first_v->prev = default_version_info;
- default_version_info->next = first_v;
- default_version_info->prev = NULL;
- }
-
- default_node = default_version_info->this_node;
-
#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
if (targetm.has_ifunc_p ())
{
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index 24b0c4e..e3719b3 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -153,12 +153,13 @@ class scalar_chain
bitmap insns_conv;
hash_map<rtx, rtx> defs_map;
- unsigned n_sse_to_integer;
- unsigned n_integer_to_sse;
+ /* Cost of inserted conversion between ineteger and sse. */
+ int cost_sse_integer;
+ sreal weighted_cost_sse_integer;
auto_vec<rtx_insn *> control_flow_insns;
bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
- virtual int compute_convert_gain () = 0;
+ virtual bool compute_convert_gain () = 0;
int convert ();
protected:
@@ -184,11 +185,11 @@ class general_scalar_chain : public scalar_chain
public:
general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
: scalar_chain (smode_, vmode_) {}
- int compute_convert_gain () final override;
+ bool compute_convert_gain () final override;
private:
void convert_insn (rtx_insn *insn) final override;
- int vector_const_cost (rtx exp);
+ int vector_const_cost (rtx exp, basic_block bb);
rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
};
@@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain
{
public:
timode_scalar_chain () : scalar_chain (TImode, V1TImode) {}
- int compute_convert_gain () final override;
+ bool compute_convert_gain () final override;
private:
void fix_debug_reg_uses (rtx reg);
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index 19d78d7..6fa601d 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -118,8 +118,6 @@ DEF_PTA(SHA512)
DEF_PTA(SM4)
DEF_PTA(APX_F)
DEF_PTA(USER_MSR)
-DEF_PTA(EVEX512)
-DEF_PTA(AVX10_1_256)
DEF_PTA(AVX10_1)
DEF_PTA(AVX10_2)
DEF_PTA(AMX_AVX512)
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index a9fac01..1243867 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -259,9 +259,7 @@ static struct ix86_target_opts isa2_opts[] =
{ "-msm3", OPTION_MASK_ISA2_SM3 },
{ "-msha512", OPTION_MASK_ISA2_SHA512 },
{ "-msm4", OPTION_MASK_ISA2_SM4 },
- { "-mevex512", OPTION_MASK_ISA2_EVEX512 },
{ "-musermsr", OPTION_MASK_ISA2_USER_MSR },
- { "-mavx10.1-256", OPTION_MASK_ISA2_AVX10_1_256 },
{ "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 },
{ "-mavx10.2", OPTION_MASK_ISA2_AVX10_2 },
{ "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 },
@@ -713,8 +711,6 @@ ix86_function_specific_save (struct cl_target_option *ptr,
ptr->x_ix86_apx_features = opts->x_ix86_apx_features;
ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
- ptr->x_ix86_no_avx512_explicit = opts->x_ix86_no_avx512_explicit;
- ptr->x_ix86_no_avx10_1_explicit = opts->x_ix86_no_avx10_1_explicit;
ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
@@ -858,8 +854,6 @@ ix86_function_specific_restore (struct gcc_options *opts,
opts->x_ix86_apx_features = ptr->x_ix86_apx_features;
opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
- opts->x_ix86_no_avx512_explicit = ptr->x_ix86_no_avx512_explicit;
- opts->x_ix86_no_avx10_1_explicit = ptr->x_ix86_no_avx10_1_explicit;
opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
@@ -1131,11 +1125,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("sha512", OPT_msha512),
IX86_ATTR_ISA ("sm4", OPT_msm4),
IX86_ATTR_ISA ("apxf", OPT_mapxf),
- IX86_ATTR_ISA ("evex512", OPT_mevex512),
IX86_ATTR_ISA ("usermsr", OPT_musermsr),
- IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256),
IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1),
- IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1),
IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2),
IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512),
IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32),
@@ -1271,13 +1262,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
}
}
- /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */
- if (opt == OPT_msse4 && !opt_set_p)
- {
- opt = OPT_mno_sse4;
- opt_set_p = true;
- }
-
/* Process the option. */
if (opt == N_OPTS)
{
@@ -1436,18 +1420,6 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
target_clone_attr))
return error_mark_node;
- /* AVX10.1-256 will enable only 256 bit AVX512F features by setting all
- AVX512 related ISA flags and not setting EVEX512. When it is used
- with avx512 related function attribute, we need to enable 512 bit to
- align with the command line behavior. Manually set EVEX512 for this
- scenario. */
- if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256)
- && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F)
- && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
- && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)
- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))
- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
/* If the changed options are different from the default, rerun
ix86_option_override_internal, and then save the options away.
The string options are attribute options, and will be undone
@@ -1458,10 +1430,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
|| option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
|| option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
|| enum_opts_set.x_ix86_fpmath
- || enum_opts_set.x_prefer_vector_width_type
- || (!(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_1_256)
- && (opts->x_ix86_isa_flags2_explicit
- & OPTION_MASK_ISA2_AVX10_1_256)))
+ || enum_opts_set.x_prefer_vector_width_type)
{
/* If we are using the default tune= or arch=, undo the string assigned,
and use the default. */
@@ -2025,7 +1994,7 @@ ix86_option_override_internal (bool main_args_p,
struct gcc_options *opts_set)
{
unsigned int i;
- unsigned HOST_WIDE_INT ix86_arch_mask, avx512_isa_flags, avx512_isa_flags2;
+ unsigned HOST_WIDE_INT ix86_arch_mask;
const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
/* -mrecip options. */
@@ -2044,15 +2013,6 @@ ix86_option_override_internal (bool main_args_p,
{ "vec-sqrt", RECIP_MASK_VEC_SQRT },
};
- avx512_isa_flags = OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
- | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
- | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
- | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
- | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
- | OPTION_MASK_ISA_AVX512BITALG;
- avx512_isa_flags2 = OPTION_MASK_ISA2_AVX512FP16
- | OPTION_MASK_ISA2_AVX512BF16;
-
/* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
@@ -2674,107 +2634,6 @@ ix86_option_override_internal (bool main_args_p,
&= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
& ~opts->x_ix86_isa_flags_explicit);
- /* Emit a warning if AVX10.1 options is used with AVX512/EVEX512 options except
- for the following option combinations:
- 1. Both AVX10.1-512 and AVX512 with 512 bit vector width are enabled with no
- explicit disable on other AVX512 features.
- 2. Both AVX10.1-256 and AVX512 w/o 512 bit vector width are enabled with no
- explicit disable on other AVX512 features.
- 3. Both AVX10.1 and AVX512 are disabled. */
- if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2))
- {
- if (opts->x_ix86_no_avx512_explicit
- && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
- & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
- || ((~((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
- & opts->x_ix86_isa_flags2)
- & ((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
- & opts->x_ix86_isa_flags2_explicit)))))
- warning (0, "%<-mno-evex512%> or %<-mno-avx512XXX%> cannot disable "
- "AVX10 instructions when AVX10.1-512 is available in GCC 15, "
- "behavior will change to it will disable that part of "
- "AVX512 instructions since GCC 16");
- }
- else if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
- {
- if (TARGET_EVEX512_P (opts->x_ix86_isa_flags2)
- && (OPTION_MASK_ISA2_EVEX512 & opts->x_ix86_isa_flags2_explicit))
- {
- if (!TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- || !(OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
- {
- /* We should not emit 512 bit instructions under AVX10.1-256
- when EVEX512 is enabled w/o any AVX512 features enabled.
- Disable EVEX512 bit for this. */
- warning (0, "Using %<-mevex512%> without any AVX512 features "
- "enabled together with AVX10.1 only will not enable "
- "any AVX512 or AVX10.1-512 features, using 256 as "
- "max vector size");
- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_EVEX512;
- }
- else
- warning (0, "Vector size conflicts between AVX10.1 and AVX512, "
- "using 512 as max vector size");
- }
- else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
- && !(OPTION_MASK_ISA2_EVEX512
- & opts->x_ix86_isa_flags2_explicit))
- warning (0, "Vector size conflicts between AVX10.1 and AVX512, using "
- "512 as max vector size");
- else if (opts->x_ix86_no_avx512_explicit
- && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
- & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
- || ((~(avx512_isa_flags2 & opts->x_ix86_isa_flags2)
- & (avx512_isa_flags2
- & opts->x_ix86_isa_flags2_explicit)))))
- warning (0, "%<-mno-avx512XXX%> cannot disable AVX10 instructions "
- "when AVX10 is available in GCC 15, behavior will change "
- "to it will disable that part of AVX512 instructions since "
- "GCC 16");
- }
- else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && (OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
- {
- if (opts->x_ix86_no_avx10_1_explicit
- && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1)
- & opts->x_ix86_isa_flags2_explicit))
- {
- warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> cannot disable "
- "AVX512 instructions when %<-mavx512XXX%> in GCC 15, "
- "behavior will change to it will disable all the "
- "instructions in GCC 16");
- /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is
- disabled. */
- if (OPTION_MASK_ISA2_AVX10_1_256 & opts->x_ix86_isa_flags2_explicit)
- {
- opts->x_ix86_isa_flags = (~avx512_isa_flags
- & opts->x_ix86_isa_flags)
- | (avx512_isa_flags & opts->x_ix86_isa_flags
- & opts->x_ix86_isa_flags_explicit);
- opts->x_ix86_isa_flags2 = (~avx512_isa_flags2
- & opts->x_ix86_isa_flags2)
- | (avx512_isa_flags2 & opts->x_ix86_isa_flags2
- & opts->x_ix86_isa_flags2_explicit);
- }
- }
- }
-
- /* Set EVEX512 if one of the following conditions meets:
- 1. AVX512 is enabled while EVEX512 is not explicitly set/unset.
- 2. AVX10.1-512 is enabled. */
- if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)
- || (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)))
- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
- /* Enable all AVX512 related ISAs when AVX10.1 is enabled. */
- if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
- {
- opts->x_ix86_isa_flags |= avx512_isa_flags;
- opts->x_ix86_isa_flags2 |= avx512_isa_flags2;
- }
-
/* Validate -mpreferred-stack-boundary= value or default it to
PREFERRED_STACK_BOUNDARY_DEFAULT. */
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
@@ -2828,8 +2687,8 @@ ix86_option_override_internal (bool main_args_p,
if (flag_nop_mcount)
error ("%<-mnop-mcount%> is not compatible with this target");
#endif
- if (flag_nop_mcount && flag_pic)
- error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>");
+ if (flag_nop_mcount && flag_pic && !flag_plt)
+ error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>");
/* Accept -msseregparm only if at least SSE support is enabled. */
if (TARGET_SSEREGPARM_P (opts->x_target_flags)
@@ -3049,8 +2908,7 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
if (opts_set->x_ix86_move_max == PVW_NONE)
{
- if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+ if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_move_max = PVW_AVX512;
/* Align with vectorizer to avoid potential STLF issue. */
else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3076,8 +2934,7 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_store_max = opts->x_prefer_vector_width_type;
if (opts_set->x_ix86_store_max == PVW_NONE)
{
- if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
- && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+ if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_store_max = PVW_AVX512;
/* Align with vectorizer to avoid potential STLF issue. */
else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3374,13 +3231,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
case 'e':
if (TARGET_PREFER_AVX256)
{
- if (!TARGET_AVX512F || !TARGET_EVEX512)
- str = "avx512f,evex512,prefer-vector-width=512";
+ if (!TARGET_AVX512F)
+ str = "avx512f,prefer-vector-width=512";
else
str = "prefer-vector-width=512";
}
- else if (!TARGET_AVX512F || !TARGET_EVEX512)
- str = "avx512f,evex512";
+ else if (!TARGET_AVX512F)
+ str = "avx512f";
break;
default:
gcc_unreachable ();
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 39f8bc6..06f0288 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,5 +35,6 @@ along with GCC; see the file COPYING3. If not see
PR116174. */
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
+ INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bea3fd4..86194b3 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -186,6 +186,7 @@ extern void ix86_expand_v2di_ashiftrt (rtx[]);
extern rtx ix86_replace_reg_with_reg (rtx, rtx, rtx);
extern rtx ix86_find_base_term (rtx);
extern bool ix86_check_movabs (rtx, int);
+extern bool ix86_check_movs (rtx, int);
extern bool ix86_check_no_addr_space (rtx);
extern void ix86_split_idivmod (machine_mode, rtx[], bool);
extern bool ix86_hardreg_mov_ok (rtx, rtx);
@@ -198,6 +199,7 @@ extern int ix86_attr_length_vex_default (rtx_insn *, bool, bool);
extern rtx ix86_libcall_value (machine_mode);
extern bool ix86_function_arg_regno_p (int);
extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern void ix86_asm_output_labelref (FILE *, const char *, const char *);
extern void ix86_call_abi_override (const_tree);
extern int ix86_reg_parm_stack_space (const_tree);
@@ -427,12 +429,21 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
(gcc::context *);
extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
(gcc::context *);
+extern rtl_opt_pass *make_pass_remove_redundant_vector_load
+ (gcc::context *);
extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
extern bool ix86_has_no_direct_extern_access;
extern bool ix86_rpad_gate ();
+extern sbitmap ix86_get_separate_components (void);
+extern sbitmap ix86_components_for_bb (basic_block);
+extern void ix86_disqualify_components (sbitmap, edge, sbitmap, bool);
+extern void ix86_emit_prologue_components (sbitmap);
+extern void ix86_emit_epilogue_components (sbitmap);
+extern void ix86_set_handled_components (sbitmap);
+
/* In i386-expand.cc. */
bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
HOST_WIDE_INT*);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4f8380c4..7785329 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-features.h"
#include "function-abi.h"
#include "rtl-error.h"
+#include "gimple-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -458,6 +459,9 @@ int ix86_arch_specified;
indirect thunk pushes the return address onto stack, destroying
red-zone.
+ NB: Don't use red-zone for functions with no_caller_saved_registers
+ and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+
TODO: If we can reserve the first 2 WORDs, for PUSH and, another
for CALL, in red-zone, we can allow local indirect jumps with
indirect thunk. */
@@ -467,6 +471,9 @@ ix86_using_red_zone (void)
{
return (TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
+ && (!TARGET_APX_EGPR
+ || (cfun->machine->call_saved_registers
+ != TYPE_NO_CALLER_SAVED_REGISTERS))
&& (!cfun->machine->has_local_indirect_jump
|| cfun->machine->indirect_branch_type == indirect_branch_keep));
}
@@ -1709,6 +1716,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname,
}
}
+/* Output a user-defined label. In AT&T syntax, registers are prefixed
+ with %, so labels require no punctuation. In Intel syntax, registers
+ are unprefixed, so labels may clash with registers or other operators,
+ and require quoting. */
+void
+ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
+{
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fprintf (file, "%s%s", prefix, label);
+ else
+ fprintf (file, "\"%s%s\"", prefix, label);
+}
+
/* Implementation of call abi switching target hook. Specific to FNDECL
the specific call register sets are set. See also
ix86_conditional_register_usage for more details. */
@@ -1788,8 +1808,7 @@ ix86_init_pic_reg (void)
add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
}
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
insert_insn_on_edge (seq, entry_edge);
@@ -1991,8 +2010,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
&& GET_MODE_INNER (mode) == innermode)
{
- if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
- && !TARGET_IAMCU)
+ if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
{
static bool warnedavx512f;
static bool warnedavx512f_ret;
@@ -4415,7 +4433,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
/* AVX512F values are returned in ZMM0 if available. */
if (size == 64)
- return !TARGET_AVX512F || !TARGET_EVEX512;
+ return !TARGET_AVX512F;
}
if (mode == XFmode)
@@ -4732,8 +4750,7 @@ ix86_va_start (tree valist, rtx nextarg)
start_sequence ();
emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
push_topmost_sequence ();
emit_insn_after (seq, entry_of_function ());
@@ -5173,6 +5190,27 @@ ix86_check_movabs (rtx insn, int opnum)
return volatile_ok || !MEM_VOLATILE_P (mem);
}
+/* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */
+bool
+ix86_check_movs (rtx insn, int idx)
+{
+ rtx pat = PATTERN (insn);
+ gcc_assert (GET_CODE (pat) == PARALLEL);
+
+ rtx set = XVECEXP (pat, 0, idx);
+ gcc_assert (GET_CODE (set) == SET);
+
+ rtx dst = SET_DEST (set);
+ gcc_assert (MEM_P (dst));
+
+ rtx src = SET_SRC (set);
+ gcc_assert (MEM_P (src));
+
+ return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
+ && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
+ || Pmode == word_mode));
+}
+
/* Return false if INSN contains a MEM with a non-default address space. */
bool
ix86_check_no_addr_space (rtx insn)
@@ -5349,7 +5387,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
switch (GET_MODE_SIZE (mode))
{
case 64:
- if (TARGET_AVX512F && TARGET_EVEX512)
+ if (TARGET_AVX512F)
return 2;
break;
case 32:
@@ -5402,10 +5440,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vpxord\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxord\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxord\t%g0, %g0, %g0";
}
return "vpxor\t%x0, %x0, %x0";
@@ -5421,19 +5457,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vxorpd\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vxorpd\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vxorpd\t%g0, %g0, %g0";
}
else
{
if (TARGET_AVX512VL)
return "vpxorq\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxorq\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxorq\t%g0, %g0, %g0";
}
}
return "vxorpd\t%x0, %x0, %x0";
@@ -5450,19 +5482,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vxorps\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vxorps\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vxorps\t%g0, %g0, %g0";
}
else
{
if (TARGET_AVX512VL)
return "vpxord\t%x0, %x0, %x0";
- else if (TARGET_EVEX512)
- return "vpxord\t%g0, %g0, %g0";
else
- gcc_unreachable ();
+ return "vpxord\t%g0, %g0, %g0";
}
}
return "vxorps\t%x0, %x0, %x0";
@@ -5483,7 +5511,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
case MODE_XI:
case MODE_V8DF:
case MODE_V16SF:
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
case MODE_OI:
@@ -5499,10 +5527,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (TARGET_AVX512VL)
return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
- else if (TARGET_EVEX512)
- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
else
- gcc_unreachable ();
+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
}
return (TARGET_AVX
? "vpcmpeqd\t%0, %0, %0"
@@ -5516,7 +5542,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
{
if (GET_MODE_SIZE (mode) == 64)
{
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpcmpeqd\t%t0, %t0, %t0";
}
else if (GET_MODE_SIZE (mode) == 32)
@@ -5528,7 +5554,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
}
else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
{
- gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+ gcc_assert (TARGET_AVX512F);
return "vpcmpeqd\t%x0, %x0, %x0";
}
@@ -5639,8 +5665,6 @@ ix86_get_ssemov (rtx *operands, unsigned size,
|| memory_operand (operands[1], mode))
gcc_unreachable ();
size = 64;
- /* We need TARGET_EVEX512 to move into zmm register. */
- gcc_assert (TARGET_EVEX512);
switch (type)
{
case opcode_int:
@@ -6881,6 +6905,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
&& (nregs + aligned) >= 3;
}
+/* Check if push/pop should be used to save/restore registers. */
+static bool
+save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
+{
+ return ((!to_allocate && cfun->machine->frame.nregs <= 1)
+ || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ /* If stack clash probing needs a loop, then it needs a
+ scratch register. But the returned register is only guaranteed
+ to be safe to use after register saves are complete. So if
+ stack clash protections are enabled and the allocated frame is
+ larger than the probe interval, then use pushes to save
+ callee saved registers. */
+ || (flag_stack_clash_protection
+ && !ix86_target_stack_probe ()
+ && to_allocate > get_probe_interval ()));
+}
+
/* Fill structure ix86_frame about frame of currently computed function. */
static void
@@ -7165,20 +7209,7 @@ ix86_compute_frame_layout (void)
/* Size prologue needs to allocate. */
to_allocate = offset - frame->sse_reg_save_offset;
- if ((!to_allocate && frame->nregs <= 1)
- || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
- /* If static stack checking is enabled and done with probes,
- the registers need to be saved before allocating the frame. */
- || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
- /* If stack clash probing needs a loop, then it needs a
- scratch register. But the returned register is only guaranteed
- to be safe to use after register saves are complete. So if
- stack clash protections are enabled and the allocated frame is
- larger than the probe interval, then use pushes to save
- callee saved registers. */
- || (flag_stack_clash_protection
- && !ix86_target_stack_probe ()
- && to_allocate > get_probe_interval ()))
+ if (save_regs_using_push_pop (to_allocate))
frame->save_regs_using_mov = false;
if (ix86_using_red_zone ()
@@ -7636,7 +7667,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
{
- ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+ /* Skip registers, already processed by shrink wrap separate. */
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
cfa_offset -= UNITS_PER_WORD;
}
}
@@ -7729,8 +7762,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
add_frame_related_expr = true;
}
- insn = emit_insn (gen_pro_epilogue_adjust_stack_add
- (Pmode, dest, src, addend));
+ /* Shrink wrap separate may insert prologue between TEST and JMP. In order
+ not to affect EFlags, emit add without reg clobbering. */
+ if (crtl->shrink_wrapped_separate)
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
+ (Pmode, dest, src, addend));
+ else
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_add
+ (Pmode, dest, src, addend));
+
if (style >= 0)
ix86_add_queued_cfa_restore_notes (insn);
@@ -7914,6 +7954,15 @@ ix86_update_stack_boundary (void)
if (ix86_tls_descriptor_calls_expanded_in_cfun
&& crtl->preferred_stack_boundary < 128)
crtl->preferred_stack_boundary = 128;
+
+ /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
+ are 32 bits, but if force_align_arg_pointer is specified, it should
+ prefer 128 bits for a backward-compatibility reason, which is also
+ what the doc suggests. */
+ if (lookup_attribute ("force_align_arg_pointer",
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
+ && crtl->preferred_stack_boundary < 128)
+ crtl->preferred_stack_boundary = 128;
}
/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
@@ -7944,8 +7993,7 @@ ix86_get_drap_rtx (void)
start_sequence ();
drap_vreg = copy_to_reg (arg_ptr);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
if (!optimize)
@@ -8466,6 +8514,128 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}
+/* Data passed to ix86_update_stack_alignment. */
+struct stack_access_data
+{
+ /* The stack access register. */
+ const_rtx reg;
+ /* Pointer to stack alignment. */
+ unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in PAT. */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+ /* This insn may reference stack slot. Update the maximum stack slot
+ alignment if the memory is referenced by the stack access register. */
+ stack_access_data *p = (stack_access_data *) data;
+
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, pat, ALL)
+ {
+ auto op = *iter;
+ if (MEM_P (op))
+ {
+ if (reg_mentioned_p (p->reg, XEXP (op, 0)))
+ {
+ unsigned int alignment = MEM_ALIGN (op);
+
+ if (alignment > *p->stack_alignment)
+ *p->stack_alignment = alignment;
+ break;
+ }
+ else
+ iter.skip_subrtxes ();
+ }
+ }
+}
+
+/* Helper function for ix86_find_all_reg_uses. */
+
+static void
+ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
+ rtx set, unsigned int regno,
+ auto_bitmap &worklist)
+{
+ rtx dest = SET_DEST (set);
+
+ if (!REG_P (dest))
+ return;
+
+ /* Reject non-Pmode modes. */
+ if (GET_MODE (dest) != Pmode)
+ return;
+
+ unsigned int dst_regno = REGNO (dest);
+
+ if (TEST_HARD_REG_BIT (regset, dst_regno))
+ return;
+
+ const_rtx src = SET_SRC (set);
+
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, src, ALL)
+ {
+ auto op = *iter;
+
+ if (MEM_P (op))
+ iter.skip_subrtxes ();
+
+ if (REG_P (op) && REGNO (op) == regno)
+ {
+ /* Add this register to register set. */
+ add_to_hard_reg_set (&regset, Pmode, dst_regno);
+ bitmap_set_bit (worklist, dst_regno);
+ break;
+ }
+ }
+}
+
+/* Find all registers defined with register REGNO. */
+
+static void
+ix86_find_all_reg_uses (HARD_REG_SET &regset,
+ unsigned int regno, auto_bitmap &worklist)
+{
+ for (df_ref ref = DF_REG_USE_CHAIN (regno);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ unsigned int ref_regno = DF_REF_REGNO (ref);
+
+ rtx set = single_set (insn);
+ if (set)
+ {
+ ix86_find_all_reg_uses_1 (regset, set,
+ ref_regno, worklist);
+ continue;
+ }
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != PARALLEL)
+ continue;
+
+ for (int i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx exp = XVECEXP (pat, 0, i);
+
+ if (GET_CODE (exp) == SET)
+ ix86_find_all_reg_uses_1 (regset, exp,
+ ref_regno, worklist);
+ }
+ }
+}
+
/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
@@ -8484,10 +8654,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
- /* The preferred stack alignment is the minimum stack alignment. */
- if (stack_alignment > crtl->preferred_stack_boundary)
- stack_alignment = crtl->preferred_stack_boundary;
-
bool require_stack_frame = false;
FOR_EACH_BB_FN (bb, cfun)
@@ -8499,27 +8665,67 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;
-
- if (check_stack_slot)
- {
- /* Find the maximum stack alignment. */
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
- if (MEM_P (*iter)
- && (reg_mentioned_p (stack_pointer_rtx,
- *iter)
- || reg_mentioned_p (frame_pointer_rtx,
- *iter)))
- {
- unsigned int alignment = MEM_ALIGN (*iter);
- if (alignment > stack_alignment)
- stack_alignment = alignment;
- }
- }
+ break;
}
}
cfun->machine->stack_frame_required = require_stack_frame;
+
+ /* Stop if we don't need to check stack slot. */
+ if (!check_stack_slot)
+ return;
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ if (stack_alignment > crtl->preferred_stack_boundary)
+ stack_alignment = crtl->preferred_stack_boundary;
+
+ HARD_REG_SET stack_slot_access;
+ CLEAR_HARD_REG_SET (stack_slot_access);
+
+ /* Stack slot can be accessed by stack pointer, frame pointer or
+ registers defined by stack pointer or frame pointer. */
+ auto_bitmap worklist;
+
+ add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
+ bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+ if (frame_pointer_needed)
+ {
+ add_to_hard_reg_set (&stack_slot_access, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+ }
+
+ unsigned int regno;
+
+ do
+ {
+ regno = bitmap_clear_first_set_bit (worklist);
+ ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
+ }
+ while (!bitmap_empty_p (worklist));
+
+ hard_reg_set_iterator hrsi;
+ stack_access_data data;
+
+ data.stack_alignment = &stack_alignment;
+
+ EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
+ for (df_ref ref = DF_REG_USE_CHAIN (regno);
+ ref != NULL;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ if (DF_REF_IS_ARTIFICIAL (ref))
+ continue;
+
+ rtx_insn *insn = DF_REF_INSN (ref);
+
+ if (!NONJUMP_INSN_P (insn))
+ continue;
+
+ data.reg = DF_REF_REG (ref);
+ note_stores (insn, ix86_update_stack_alignment, &data);
+ }
}
/* Finalize stack_realign_needed and frame_pointer_needed flags, which
@@ -9029,11 +9235,22 @@ ix86_expand_prologue (void)
doing this if we have to probe the stack; at least on x86_64 the
stack probe can turn into a call that clobbers a red zone location. */
else if (ix86_using_red_zone ()
- && (! TARGET_STACK_PROBE
- || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+ && (! TARGET_STACK_PROBE
+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
{
+ HOST_WIDE_INT allocate_offset;
+ if (crtl->shrink_wrapped_separate)
+ {
+ allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
+
+ /* Adjust the total offset at the beginning of the function. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (allocate_offset), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
+ }
+
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- cfun->machine->red_zone_used = true;
int_registers_saved = true;
}
}
@@ -9226,6 +9443,8 @@ ix86_expand_prologue (void)
}
else
{
+ gcc_assert (!crtl->shrink_wrapped_separate);
+
rtx eax = gen_rtx_REG (Pmode, AX_REG);
rtx r10 = NULL;
const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
@@ -9611,30 +9830,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
{
- rtx reg = gen_rtx_REG (word_mode, regno);
- rtx mem;
- rtx_insn *insn;
- mem = choose_baseaddr (cfa_offset, NULL);
- mem = gen_frame_mem (word_mode, mem);
- insn = emit_move_insn (reg, mem);
-
- if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+ /* Skip registers, already processed by shrink wrap separate. */
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
{
- /* Previously we'd represented the CFA as an expression
- like *(%ebp - 8). We've just popped that value from
- the stack, which means we need to reset the CFA to
- the drap register. This will remain until we restore
- the stack pointer. */
- add_reg_note (insn, REG_CFA_DEF_CFA, reg);
- RTX_FRAME_RELATED_P (insn) = 1;
+ rtx reg = gen_rtx_REG (word_mode, regno);
+ rtx mem;
+ rtx_insn *insn;
- /* This means that the DRAP register is valid for addressing. */
- m->fs.drap_valid = true;
- }
- else
- ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+ mem = choose_baseaddr (cfa_offset, NULL);
+ mem = gen_frame_mem (word_mode, mem);
+ insn = emit_move_insn (reg, mem);
+
+ if (m->fs.cfa_reg == crtl->drap_reg
+ && regno == REGNO (crtl->drap_reg))
+ {
+ /* Previously we'd represented the CFA as an expression
+ like *(%ebp - 8). We've just popped that value from
+ the stack, which means we need to reset the CFA to
+ the drap register. This will remain until we restore
+ the stack pointer. */
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ /* DRAP register is valid for addressing. */
+ m->fs.drap_valid = true;
+ }
+ else
+ ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+ }
cfa_offset -= UNITS_PER_WORD;
}
}
@@ -9913,10 +10137,11 @@ ix86_expand_epilogue (int style)
less work than reloading sp and popping the register. */
else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
restore_regs_via_mov = true;
- else if (TARGET_EPILOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue
- && (frame.nregs > 1
- || m->fs.sp_offset != reg_save_offset))
+ else if (crtl->shrink_wrapped_separate
+ || (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs > 1
+ || m->fs.sp_offset != reg_save_offset)))
restore_regs_via_mov = true;
else if (frame_pointer_needed
&& !frame.nregs
@@ -9930,6 +10155,9 @@ ix86_expand_epilogue (int style)
else
restore_regs_via_mov = false;
+ if (crtl->shrink_wrapped_separate)
+ gcc_assert (restore_regs_via_mov);
+
if (restore_regs_via_mov || frame.nsseregs)
{
/* Ensure that the entire register save area is addressable via
@@ -9982,6 +10210,7 @@ ix86_expand_epilogue (int style)
gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
gcc_assert (!crtl->drap_reg);
gcc_assert (!frame.nregs);
+ gcc_assert (!crtl->shrink_wrapped_separate);
}
else if (restore_regs_via_mov)
{
@@ -9996,6 +10225,8 @@ ix86_expand_epilogue (int style)
rtx sa = EH_RETURN_STACKADJ_RTX;
rtx_insn *insn;
+ gcc_assert (!crtl->shrink_wrapped_separate);
+
/* Stack realignment doesn't work with eh_return. */
if (crtl->stack_realign_needed)
sorry ("Stack realignment not supported with "
@@ -11177,6 +11408,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
x = XVECEXP (x, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+ case UNSPEC_SECREL32:
+ x = XVECEXP (x, 0, 0);
+ return GET_CODE (x) == SYMBOL_REF;
default:
return false;
}
@@ -11224,7 +11458,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
case E_OImode:
case E_XImode:
if (!standard_sse_constant_p (x, mode)
- && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
+ && GET_MODE_SIZE (TARGET_AVX512F
? XImode
: (TARGET_AVX
? OImode
@@ -11313,6 +11547,9 @@ legitimate_pic_operand_p (rtx x)
x = XVECEXP (inner, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_SECREL32:
+ x = XVECEXP (inner, 0, 0);
+ return GET_CODE (x) == SYMBOL_REF;
case UNSPEC_MACHOPIC_OFFSET:
return legitimate_pic_address_disp_p (x);
default:
@@ -11493,6 +11730,9 @@ legitimate_pic_address_disp_p (rtx disp)
disp = XVECEXP (disp, 0, 0);
return (GET_CODE (disp) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+ case UNSPEC_SECREL32:
+ disp = XVECEXP (disp, 0, 0);
+ return GET_CODE (disp) == SYMBOL_REF;
}
return false;
@@ -11770,6 +12010,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
case UNSPEC_INDNTPOFF:
case UNSPEC_NTPOFF:
case UNSPEC_DTPOFF:
+ case UNSPEC_SECREL32:
break;
default:
@@ -11795,7 +12036,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
|| GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
|| !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
|| (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
- && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
/* Non-constant pic memory reference. */
return false;
}
@@ -12119,6 +12361,24 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg)
return tp;
}
+/* Construct the SYMBOL_REF for the _tls_index symbol. */
+
+static GTY(()) rtx ix86_tls_index_symbol;
+
+#if TARGET_WIN32_TLS
+static rtx
+ix86_tls_index (void)
+{
+ if (!ix86_tls_index_symbol)
+ ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
+
+ if (flag_pic)
+ return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL));
+ else
+ return ix86_tls_index_symbol;
+}
+#endif
+
/* Construct the SYMBOL_REF for the tls_get_addr function. */
static GTY(()) rtx ix86_tls_symbol;
@@ -12177,6 +12437,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
machine_mode tp_mode = Pmode;
int type;
+#if TARGET_WIN32_TLS
+ off = gen_const_mem (SImode, ix86_tls_index ());
+ set_mem_alias_set (off, GOT_ALIAS_SET);
+
+ tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44));
+ set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG);
+
+ if (TARGET_64BIT)
+ off = convert_to_mode (Pmode, off, 1);
+
+ base = force_reg (Pmode, off);
+ tp = copy_to_mode_reg (Pmode, tp);
+
+ tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD))));
+ set_mem_alias_set (tp, GOT_ALIAS_SET);
+
+ base = force_reg (Pmode, tp);
+
+ return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32)));
+#else
/* Fall back to global dynamic model if tool chain cannot support local
dynamic. */
if (TARGET_SUN_TLS && !TARGET_64BIT
@@ -12230,8 +12510,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
start_sequence ();
emit_call_insn
(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
if (GET_MODE (x) != Pmode)
x = gen_rtx_ZERO_EXTEND (Pmode, x);
@@ -12285,8 +12564,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
start_sequence ();
emit_call_insn
(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
share the LD_BASE result with other LD model accesses. */
@@ -12399,6 +12677,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
}
return dest;
+#endif
}
/* Return true if the TLS address requires insn using integer registers.
@@ -12868,6 +13147,9 @@ output_pic_addr_const (FILE *file, rtx x, int code)
case UNSPEC_INDNTPOFF:
fputs ("@indntpoff", file);
break;
+ case UNSPEC_SECREL32:
+ fputs ("@secrel32", file);
+ break;
#if TARGET_MACHO
case UNSPEC_MACHOPIC_OFFSET:
putc ('-', file);
@@ -12893,7 +13175,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
{
fputs (ASM_LONG, file);
output_addr_const (file, x);
+#if TARGET_WIN32_TLS
+ fputs ("@secrel32", file);
+#else
fputs ("@dtpoff", file);
+#endif
switch (size)
{
case 4:
@@ -13552,10 +13838,11 @@ print_reg (rtx x, int code, FILE *file)
H -- print a memory address offset by 8; used for sse high-parts
Y -- print condition for XOP pcom* instruction.
V -- print naked full integer register name without %.
+ v -- print segment override prefix
+ -- print a branch hint as 'cs' or 'ds' prefix
; -- print a semicolon (after prefixes due to bug in older gas).
~ -- print "i" if TARGET_AVX2, "f" otherwise.
- ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+ ^ -- print addr32 prefix if Pmode != word_mode
M -- print addr32 prefix for TARGET_X32 with VSIB address.
! -- print NOTRACK prefix for jxx/call/ret instructions if required.
N -- print maskz if it's constant 0 operand.
@@ -14057,6 +14344,28 @@ ix86_print_operand (FILE *file, rtx x, int code)
return;
+ case 'v':
+ if (MEM_P (x))
+ {
+ switch (MEM_ADDR_SPACE (x))
+ {
+ case ADDR_SPACE_GENERIC:
+ break;
+ case ADDR_SPACE_SEG_FS:
+ fputs ("fs ", file);
+ break;
+ case ADDR_SPACE_SEG_GS:
+ fputs ("gs ", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ output_operand_lossage ("operand is not a memory reference, "
+ "invalid operand code 'v'");
+ return;
+
case '*':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('*', file);
@@ -14131,7 +14440,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
return;
case '^':
- if (TARGET_64BIT && Pmode != word_mode)
+ if (Pmode != word_mode)
fputs ("addr32 ", file);
return;
@@ -14646,6 +14955,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
output_addr_const (file, op);
fputs ("@indntpoff", file);
break;
+ case UNSPEC_SECREL32:
+ output_addr_const (file, op);
+ fputs ("@secrel32", file);
+ break;
#if TARGET_MACHO
case UNSPEC_MACHOPIC_OFFSET:
output_addr_const (file, op);
@@ -17898,9 +18211,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
if (cum->decl && !TREE_PUBLIC (cum->decl))
return;
- const_tree ctx = get_ultimate_context (cum->decl);
- if (ctx != NULL_TREE
- && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
+ tree decl = cum->decl;
+ if (!decl)
+ /* If we don't know the target, look at the current TU. */
+ decl = current_function_decl;
+
+ const_tree ctx = get_ultimate_context (decl);
+ if (ctx == NULL_TREE
+ || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
return;
/* If the actual size of the type is zero, then there is no change
@@ -20037,14 +20355,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
{
bool si;
enum ix86_builtins code;
- const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
if (!TARGET_AVX512F)
return NULL_TREE;
- if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
- return NULL_TREE;
-
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
? !TARGET_USE_SCATTER_2PARTS
: (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
@@ -20787,7 +21101,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
return true;
/* x87 registers can't do subreg at all, as all values are reformatted
- to extended precision. */
+ to extended precision.
+
+ ??? middle-end queries mode changes for ALL_REGS and this makes
+ vec_series_lowpart_p to always return false. We probably should
+ restrict this to modes supported by i387 and check if it is enabled. */
if (MAYBE_FLOAT_CLASS_P (regclass))
return false;
@@ -21162,7 +21480,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
- any of 512-bit wide vector mode
- any scalar mode. */
if (TARGET_AVX512F
- && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
+ && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
|| VALID_AVX512F_SCALAR_MODE (mode)))
return true;
@@ -21333,19 +21651,20 @@ ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
return mode1 == SFmode;
/* If MODE2 is only appropriate for an SSE register, then tie with
- any other mode acceptable to SSE registers. */
- if (GET_MODE_SIZE (mode2) == 64
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return (GET_MODE_SIZE (mode1) == 64
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
- if (GET_MODE_SIZE (mode2) == 32
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return (GET_MODE_SIZE (mode1) == 32
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
- if (GET_MODE_SIZE (mode2) == 16
+ any vector modes or scalar floating point modes acceptable to SSE
+ registers, excluding scalar integer modes with SUBREG:
+ (subreg:QI (reg:TI 99) 0))
+ (subreg:HI (reg:TI 99) 0))
+ (subreg:SI (reg:TI 99) 0))
+ (subreg:DI (reg:TI 99) 0))
+ to avoid unnecessary move from SSE register to integer register.
+ */
+ if (GET_MODE_SIZE (mode2) >= 16
+ && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
+ || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
+ && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return (GET_MODE_SIZE (mode1) == 16
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+ return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
/* If MODE2 is appropriate for an MMX register, then tie
with any other mode acceptable to MMX registers. */
@@ -21403,7 +21722,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
case MODE_VECTOR_INT:
case MODE_VECTOR_FLOAT:
- if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
|| (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
@@ -21464,7 +21783,7 @@ ix86_widen_mult_cost (const struct processor_costs *cost,
/* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
require extra 4 mul, 4 add, 4 cmp and 2 shift. */
if (!TARGET_SSE4_1 && !uns_p)
- extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
+ extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
+ cost->sse_op * 2;
/* Fallthru. */
case V4DImode:
@@ -21514,11 +21833,11 @@ ix86_multiplication_cost (const struct processor_costs *cost,
else if (TARGET_AVX2)
nops += 2;
else if (TARGET_XOP)
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
else
{
nops += 1;
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
}
goto do_qimode;
@@ -21537,13 +21856,13 @@ ix86_multiplication_cost (const struct processor_costs *cost,
{
nmults += 1;
nops += 2;
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
}
else
{
nmults += 1;
nops += 4;
- extra += cost->sse_load[2];
+ extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
}
goto do_qimode;
@@ -21556,14 +21875,16 @@ ix86_multiplication_cost (const struct processor_costs *cost,
{
nmults += 1;
nops += 4;
- extra += cost->sse_load[3] * 2;
+ /* 2 loads, so no division by 2. */
+ extra += COSTS_N_INSNS (cost->sse_load[3]);
}
goto do_qimode;
case V64QImode:
nmults = 2;
nops = 9;
- extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
+ /* 2 loads of each size, so no division by 2. */
+ extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
do_qimode:
return ix86_vec_cost (mode, cost->mulss * nmults
@@ -21656,7 +21977,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
/* Use vpbroadcast. */
extra = cost->sse_op;
else
- extra = cost->sse_load[2];
+ extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
if (constant_op1)
{
@@ -21687,7 +22008,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
shift with one insn set the cost to prefer paddb. */
if (constant_op1)
{
- extra = cost->sse_load[2];
+ extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
return ix86_vec_cost (mode, cost->sse_op) + extra;
}
else
@@ -21702,7 +22023,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
/* Use vpbroadcast. */
extra = cost->sse_op;
else
- extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+ extra = COSTS_N_INSNS (mode == V16QImode
+ ? cost->sse_load[2]
+ : cost->sse_load[3]) / 2;
if (constant_op1)
{
@@ -21810,6 +22133,34 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
return insn_cost + pattern_cost (PATTERN (insn), speed);
}
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+ if (size < 128)
+ return cost->cvtss2sd;
+ else if (size < 256)
+ {
+ if (TARGET_SSE_SPLIT_REGS)
+ return cost->cvtss2sd * size / 64;
+ return cost->cvtss2sd;
+ }
+ if (size < 512)
+ return cost->vcvtps2pd256;
+ else
+ return cost->vcvtps2pd512;
+}
+
+/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */
+
+static bool
+unspec_pcmp_p (rtx x)
+{
+ return GET_CODE (x) == UNSPEC
+ && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
+}
+
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
@@ -21827,9 +22178,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
/* Handling different vternlog variants. */
if ((GET_MODE_SIZE (mode) == 64
- ? (TARGET_AVX512F && TARGET_EVEX512)
+ ? TARGET_AVX512F
: (TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
&& GET_MODE_SIZE (mode) >= 16
&& outer_code_i == SET
&& ternlog_operand (x, mode))
@@ -22178,8 +22529,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
{
/* (ior (not ...) ...) can be a single insn in AVX512. */
if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22270,8 +22620,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
/* (and (not ...) (not ...)) can be a single insn in AVX512. */
if (GET_CODE (right) == NOT && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22341,8 +22690,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
{
/* (not (xor ...)) can be a single insn in AVX512. */
if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
- && ((TARGET_EVEX512
- && GET_MODE_SIZE (mode) == 64)
+ && (GET_MODE_SIZE (mode) == 64
|| (TARGET_AVX512VL
&& (GET_MODE_SIZE (mode) == 32
|| GET_MODE_SIZE (mode) == 16))))
@@ -22473,17 +22821,39 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
return false;
case FLOAT_EXTEND:
+ /* x87 represents all values extended to 80bit. */
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = 0;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
case FLOAT_TRUNCATE:
if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
*total = cost->fadd;
else
- *total = ix86_vec_cost (mode, cost->addss);
+ *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
+ return false;
+ case FLOAT:
+ case UNSIGNED_FLOAT:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtpi2ps);
+ else
+ *total = cost->cvtsi2ss;
+ return false;
+
+ case FIX:
+ case UNSIGNED_FIX:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtps2pi);
+ else
+ *total = cost->cvtss2si;
return false;
case ABS:
@@ -22544,13 +22914,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
return false;
- case VEC_SELECT:
case VEC_CONCAT:
/* ??? Assume all of these vector manipulation patterns are
recognizable. In which case they all pretty much have the
- same cost. */
+ same cost.
+ ??? We should still recruse when computing cost. */
*total = cost->sse_op;
return true;
+
+ case VEC_SELECT:
+ /* Special case extracting lower part from the vector.
+ This by itself needs to code and most of SSE/AVX instructions have
+ packed and single forms where the single form may be represented
+ by such VEC_SELECT.
+
+ Use cost 1 (despite the fact that functionally equivalent SUBREG has
+ cost 0). Making VEC_SELECT completely free, for example instructs CSE
+ to forward propagate VEC_SELECT into
+
+ (set (reg eax) (reg src))
+
+ which then prevents fwprop and combining. See i.e.
+ gcc.target/i386/pr91103-1.c.
+
+ ??? rtvec_series_p test should be, for valid patterns, equivalent to
+ vec_series_lowpart_p but is not, since the latter calls
+ can_cange_mode_class on ALL_REGS and this return false since x87 does
+ not support subregs at all. */
+ if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
+ *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
+ outer_code, opno, speed) + 1;
+ else
+ /* ??? We should still recruse when computing cost. */
+ *total = cost->sse_op;
+ return true;
+
case VEC_DUPLICATE:
*total = rtx_cost (XEXP (x, 0),
GET_MODE (XEXP (x, 0)),
@@ -22563,13 +22961,87 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
case VEC_MERGE:
mask = XEXP (x, 2);
+ /* Scalar versions of SSE instructions may be represented as:
+
+ (vec_merge (vec_duplicate (operation ....))
+ (register or memory)
+ (const_int 1))
+
+ In this case vec_merge and vec_duplicate is for free.
+ Just recurse into operation and second operand. */
+ if (mask == const1_rtx
+ && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+ {
+ *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+ outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+ return true;
+ }
/* This is masked instruction, assume the same cost,
as nonmasked variant. */
- if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
- *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
+ else if (TARGET_AVX512F
+ && (register_operand (mask, GET_MODE (mask))
+ /* Redunduant clean up of high bits for kmask with VL=2/4
+ .i.e (vec_merge op0, op1, (and op3 15)). */
+ || (GET_CODE (mask) == AND
+ && register_operand (XEXP (mask, 0), GET_MODE (mask))
+ && CONST_INT_P (XEXP (mask, 1))
+ && ((INTVAL (XEXP (mask, 1)) == 3
+ && GET_MODE_NUNITS (mode) == 2)
+ || (INTVAL (XEXP (mask, 1)) == 15
+ && GET_MODE_NUNITS (mode) == 4)))))
+ {
+ *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+ return true;
+ }
+ /* Combination of the two above:
+
+ (vec_merge (vec_merge (vec_duplicate (operation ...))
+ (register or memory)
+ (reg:QI mask))
+ (register or memory)
+ (const_int 1))
+
+ i.e. avx512fp16_vcvtss2sh_mask. */
+ else if (TARGET_AVX512F
+ && mask == const1_rtx
+ && GET_CODE (XEXP (x, 0)) == VEC_MERGE
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
+ && register_operand (XEXP (XEXP (x, 0), 2),
+ GET_MODE (XEXP (XEXP (x, 0), 2))))
+ {
+ *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (XEXP (x, 0), 1),
+ mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+ return true;
+ }
+ /* vcmp. */
+ else if (unspec_pcmp_p (mask)
+ || (GET_CODE (mask) == NOT
+ && unspec_pcmp_p (XEXP (mask, 0))))
+ {
+ rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
+ rtx unsop0 = XVECEXP (uns, 0, 0);
+ /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
+ cost the same as register.
+ This is used by avx_cmp<mode>3_ltint_not. */
+ if (GET_CODE (unsop0) == SUBREG)
+ unsop0 = XEXP (unsop0, 0);
+ if (GET_CODE (unsop0) == NOT)
+ unsop0 = XEXP (unsop0, 0);
+ *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+ + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
+ + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
+ + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
+ + cost->sse_op;
+ return true;
+ }
else
*total = cost->sse_op;
- return true;
+ return false;
case MEM:
/* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
@@ -22586,7 +23058,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
/* An insn that accesses memory is slightly more expensive
- than one that does not. */
+ than one that does not. */
if (speed)
{
*total += 1;
@@ -23158,6 +23630,12 @@ x86_print_call_or_nop (FILE *file, const char *target)
if (flag_nop_mcount || !strcmp (target, "nop"))
/* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+ else if (!TARGET_PECOFF && flag_pic)
+ {
+ gcc_assert (flag_plt);
+
+ fprintf (file, "1:\tcall\t%s@PLT\n", target);
+ }
else
fprintf (file, "1:\tcall\t%s\n", target);
}
@@ -23321,7 +23799,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
break;
case CM_SMALL_PIC:
case CM_MEDIUM_PIC:
- if (!ix86_direct_extern_access)
+ if (!flag_plt)
{
if (ASSEMBLER_DIALECT == ASM_INTEL)
fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
@@ -23352,7 +23830,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
"\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- if (ASSEMBLER_DIALECT == ASM_INTEL)
+ if (flag_plt)
+ x86_print_call_or_nop (file, mcount_name);
+ else if (ASSEMBLER_DIALECT == ASM_INTEL)
fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
else
fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
@@ -24132,7 +24612,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
return true;
- if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
&& VALID_MMX_REG_MODE (mode))
@@ -24380,8 +24860,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
}
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
if (saw_asm_flag)
return seq;
@@ -24669,7 +25148,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
switch (type_of_cost)
{
case scalar_stmt:
- return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+ return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
case scalar_load:
/* load/store costs are relative to register move which is 2. Recompute
@@ -24740,7 +25219,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
+ return ix86_vec_cost (mode, ix86_cost->sse_op);
+
case vec_promote_demote:
+ if (fp)
+ return vec_fp_conversion_cost (ix86_tune_cost, mode);
return ix86_vec_cost (mode, ix86_cost->sse_op);
case vec_construct:
@@ -24753,12 +25236,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
/* One vinserti128 for combining two SSE vectors for AVX256. */
else if (GET_MODE_BITSIZE (mode) == 256)
return ((n - 2) * ix86_cost->sse_op
- + ix86_vec_cost (mode, ix86_cost->addss));
+ + ix86_vec_cost (mode, ix86_cost->sse_op));
/* One vinserti64x4 and two vinserti128 for combining SSE
and AVX256 vectors to AVX512. */
else if (GET_MODE_BITSIZE (mode) == 512)
- return ((n - 4) * ix86_cost->sse_op
- + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+ {
+ machine_mode half_mode
+ = mode_for_vector (GET_MODE_INNER (mode),
+ GET_MODE_NUNITS (mode) / 2).require ();
+ return ((n - 4) * ix86_cost->sse_op
+ + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
+ + ix86_vec_cost (mode, ix86_cost->sse_op));
+ }
gcc_unreachable ();
}
@@ -24926,7 +25415,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
switch (mode)
{
case E_QImode:
- if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
return V64QImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V32QImode;
@@ -24934,7 +25423,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V16QImode;
case E_HImode:
- if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
return V32HImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V16HImode;
@@ -24942,7 +25431,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V8HImode;
case E_SImode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SImode;
@@ -24950,7 +25439,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V4SImode;
case E_DImode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V8DImode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DImode;
@@ -24964,16 +25453,15 @@ ix86_preferred_simd_mode (scalar_mode mode)
{
if (TARGET_PREFER_AVX128)
return V8HFmode;
- else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
+ else if (TARGET_PREFER_AVX256)
return V16HFmode;
}
- if (TARGET_EVEX512)
- return V32HFmode;
+ return V32HFmode;
}
return word_mode;
case E_BFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V32BFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V16BFmode;
@@ -24981,7 +25469,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V8BFmode;
case E_SFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V16SFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode;
@@ -24989,7 +25477,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
return V4SFmode;
case E_DFmode:
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
return V8DFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode;
@@ -25009,13 +25497,13 @@ ix86_preferred_simd_mode (scalar_mode mode)
static unsigned int
ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
{
- if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+ if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
{
modes->safe_push (V64QImode);
modes->safe_push (V32QImode);
modes->safe_push (V16QImode);
}
- else if (TARGET_AVX512F && TARGET_EVEX512 && all)
+ else if (TARGET_AVX512F && all)
{
modes->safe_push (V32QImode);
modes->safe_push (V16QImode);
@@ -25053,7 +25541,7 @@ ix86_get_mask_mode (machine_mode data_mode)
unsigned elem_size = vector_size / nunits;
/* Scalar mask case. */
- if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+ if ((TARGET_AVX512F && vector_size == 64)
|| (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
/* AVX512FP16 only supports vector comparison
to kmask for _Float16. */
@@ -25261,7 +25749,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else if (X87_FLOAT_MODE_P (mode))
stmt_cost = ix86_cost->fadd;
else
- stmt_cost = ix86_cost->add;
+ stmt_cost = ix86_cost->add;
}
else
stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
@@ -25316,7 +25804,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(subcode == RSHIFT_EXPR
&& !TYPE_UNSIGNED (TREE_TYPE (op1)))
? ASHIFTRT : LSHIFTRT, mode,
- TREE_CODE (op2) == INTEGER_CST,
+ TREE_CODE (op2) == INTEGER_CST,
cst_and_fits_in_hwi (op2)
? int_cst_value (op2) : -1,
false, false, NULL, NULL);
@@ -25325,27 +25813,174 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case NOP_EXPR:
/* Only sign-conversions are free. */
if (tree_nop_conversion_p
- (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
stmt_cost = 0;
+ else if (fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ break;
+
+ case FLOAT_EXPR:
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtsi2ss;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ stmt_cost = ix86_cost->fadd;
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+ break;
+
+ case FIX_TRUNC_EXPR:
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtss2si;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ stmt_cost = ix86_cost->fadd;
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+ break;
+
+ case COND_EXPR:
+ {
+ /* SSE2 conditinal move sequence is:
+ pcmpgtd %xmm5, %xmm0 (accounted separately)
+ pand %xmm0, %xmm2
+ pandn %xmm1, %xmm0
+ por %xmm2, %xmm0
+ while SSE4 uses cmp + blend
+ and AVX512 masked moves.
+
+ The condition is accounted separately since we usually have
+ p = a < b
+ c = p ? x : y
+ and we will account first statement as setcc. Exception is when
+ p is loaded from memory as bool and then we will not acocunt
+ the compare, but there is no way to check for this. */
+
+ int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+ /* If one of parameters is 0 or -1 the sequence will be simplified:
+ (if_true & mask) | (if_false & ~mask) -> if_true & mask */
+ if (ninsns > 1
+ && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs3 (stmt_info->stmt))))
+ ninsns = 1;
+
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ninsns * ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
+ else
+ /* compare (accounted separately) + cmov. */
+ stmt_cost = ix86_cost->add;
+ }
break;
- case BIT_IOR_EXPR:
- case ABS_EXPR:
- case ABSU_EXPR:
case MIN_EXPR:
case MAX_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode)
+ && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* x87 requires conditional branch. We don't have cost for
+ that. */
+ ;
+ else
+ /* minss */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vpmin was introduced in SSE3.
+ SSE2 needs pcmpgtd + pand + pandn + pxor.
+ If one of parameters is 0 or -1 the sequence is simplified
+ to pcmpgtd + pand. */
+ if (!TARGET_SSSE3)
+ {
+ if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt)))
+ stmt_cost *= 2;
+ else
+ stmt_cost *= 4;
+ }
+ }
+ else
+ /* cmp + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case ABS_EXPR:
+ case ABSU_EXPR:
+ if (fp)
+ {
+ if (X87_FLOAT_MODE_P (mode)
+ && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* fabs. */
+ stmt_cost = ix86_cost->fabs;
+ else
+ /* andss of sign bit. */
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ {
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* vabs was introduced in SSE3.
+ SSE3 uses psrat + pxor + psub. */
+ if (!TARGET_SSSE3)
+ stmt_cost *= 3;
+ }
+ else
+ /* neg + cmov. */
+ stmt_cost = ix86_cost->add * 2;
+ }
+ break;
+
+ case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
case BIT_NOT_EXPR:
- if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
- stmt_cost = ix86_cost->sse_op;
- else if (VECTOR_MODE_P (mode))
+ gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
+ && !X87_FLOAT_MODE_P (mode));
+ if (VECTOR_MODE_P (mode))
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
else
stmt_cost = ix86_cost->add;
break;
+
default:
+ if (truth_value_p (subcode))
+ {
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* CMPccS? insructions are cheap, so use sse_op. While they
+ produce a mask which may need to be turned to 0/1 by and,
+ expect that this will be optimized away in a common case. */
+ stmt_cost = ix86_cost->sse_op;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* fcmp + setcc. */
+ stmt_cost = ix86_cost->fadd + ix86_cost->add;
+ else if (VECTOR_MODE_P (mode))
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ else
+ /* setcc. */
+ stmt_cost = ix86_cost->add;
+ break;
+ }
break;
}
}
@@ -25369,6 +26004,37 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
+ if (kind == vec_promote_demote)
+ {
+ int outer_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+ int inner_size
+ = tree_to_uhwi
+ (TYPE_SIZE
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+ bool inner_fp = FLOAT_TYPE_P
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+ if (fp && inner_fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ else if (fp && !inner_fp)
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+ else if (!fp && inner_fp)
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+ greater than inner size we will end up doing two conversions and
+ packing them. We always pack pairs; if the size difference is greater
+ it is split into multiple demote operations. */
+ if (inner_size > outer_size)
+ stmt_cost = stmt_cost * 2
+ + ix86_vec_cost (mode, ix86_cost->sse_op);
+ }
+
/* If we do elementwise loads into a vector then we are bound by
latency and execution resources for the many scalar loads
(AGU and load ports). Try to account for this by scaling the
@@ -25439,7 +26105,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else
{
m_num_gpr_needed[where]++;
- stmt_cost += ix86_cost->sse_to_integer;
+
+ int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+
+ /* For integer construction, the number of actual GPR -> XMM
+ moves will be somewhere between 0 and n.
+ We do not have very good idea about actual number, since
+ the source may be a constant, memory or a chain of
+ instructions that will be later converted by
+ scalar-to-vector pass. */
+ if (kind == vec_construct
+ && GET_MODE_BITSIZE (mode) == 256)
+ cost *= 2;
+ else if (kind == vec_construct
+ && GET_MODE_BITSIZE (mode) == 512)
+ cost *= 3;
+ stmt_cost += cost;
}
}
}
@@ -25531,14 +26212,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
/* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
if (loop_vinfo
+ && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
&& ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
- {
- if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64)
- m_suggested_epilogue_mode = V32QImode;
- else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32)
- m_suggested_epilogue_mode = V16QImode;
- }
+ m_suggested_epilogue_mode = V16QImode;
/* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
enable a 64bit SSE epilogue. */
if (loop_vinfo
@@ -25666,7 +26343,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
- if (TARGET_AVX512F && TARGET_EVEX512)
+ if (TARGET_AVX512F)
clonei->vecsize_mangle = 'e';
else if (TARGET_AVX2)
clonei->vecsize_mangle = 'd';
@@ -25758,17 +26435,17 @@ ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
return -1;
if (!TARGET_AVX)
return 0;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
+ return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
case 'c':
if (!TARGET_AVX)
return -1;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
+ return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
case 'd':
if (!TARGET_AVX2)
return -1;
- return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
+ return TARGET_AVX512F ? 1 : 0;
case 'e':
- if (!TARGET_AVX512F || !TARGET_EVEX512)
+ if (!TARGET_AVX512F)
return -1;
return 0;
default:
@@ -27440,6 +28117,195 @@ ix86_cannot_copy_insn_p (rtx_insn *insn)
#undef TARGET_DOCUMENTATION_NAME
#define TARGET_DOCUMENTATION_NAME "x86"
+/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
+sbitmap
+ix86_get_separate_components (void)
+{
+ HOST_WIDE_INT offset, to_allocate;
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+ struct machine_function *m = cfun->machine;
+
+ offset = m->frame.stack_pointer_offset;
+ to_allocate = offset - m->frame.sse_reg_save_offset;
+
+ /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
+ Experiments show that APX PPX can speed up the prologue. If the function
+ does not exit early during actual execution, then using APX PPX is faster.
+ If the function always exits early during actual execution, then shrink
+ wrap separate reduces the number of MOV (PUSH/POP) instructions actually
+ executed, thus speeding up execution.
+ foo:
+ movl $1, %eax
+ testq %rdi, %rdi
+ jne.L60
+ ret ---> early return.
+ .L60:
+ subq $88, %rsp ---> belong to prologue.
+ xorl %eax, %eax
+ movq %rbx, 40 (%rsp) ---> belong to prologue.
+ movq 8 (%rdi), %rbx
+ movq %rbp, 48 (%rsp) ---> belong to prologue.
+ movq %rdi, %rbp
+ testq %rbx, %rbx
+ jne.L61
+ movq 40 (%rsp), %rbx
+ movq 48 (%rsp), %rbp
+ addq $88, %rsp
+ ret
+ .L61:
+ movq %r12, 56 (%rsp) ---> belong to prologue.
+ movq %r13, 64 (%rsp) ---> belong to prologue.
+ movq %r14, 72 (%rsp) ---> belong to prologue.
+ ... ...
+
+ Disable shrink wrap separate when PPX is enabled. */
+ if ((TARGET_APX_PPX && !crtl->calls_eh_return)
+ || cfun->machine->func_type != TYPE_NORMAL
+ || TARGET_SEH
+ || crtl->stack_realign_needed
+ || m->call_ms2sysv)
+ return components;
+
+ /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
+ Disable shrink wrap separate when MOV is prohibited. */
+ if (save_regs_using_push_pop (to_allocate))
+ return components;
+
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ /* Skip registers with large offsets, where a pseudo may be needed. */
+ if (IN_RANGE (offset, -0x8000, 0x7fff))
+ bitmap_set_bit (components, regno);
+ offset += UNITS_PER_WORD;
+ }
+
+ /* Don't mess with the following registers. */
+ if (frame_pointer_needed)
+ bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+ if (crtl->drap_reg)
+ bitmap_clear_bit (components, REGNO (crtl->drap_reg));
+
+ if (pic_offset_table_rtx)
+ bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
+
+ return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
+sbitmap
+ix86_components_for_bb (basic_block bb)
+{
+ bitmap in = DF_LIVE_IN (bb);
+ bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+ bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+
+ function_abi_aggregator callee_abis;
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (CALL_P (insn))
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
+ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
+ /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!fixed_regs[regno]
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+ || bitmap_bit_p (in, regno)
+ || bitmap_bit_p (gen, regno)
+ || bitmap_bit_p (kill, regno)))
+ bitmap_set_bit (components, regno);
+
+ return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
+void
+ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+ /* Nothing to do for x86. */
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
+void
+ix86_emit_prologue_components (sbitmap components)
+{
+ HOST_WIDE_INT cfa_offset;
+ struct machine_function *m = cfun->machine;
+
+ cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+ - m->frame.stack_pointer_offset;
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ if (bitmap_bit_p (components, regno))
+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+ cfa_offset -= UNITS_PER_WORD;
+ }
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
+void
+ix86_emit_epilogue_components (sbitmap components)
+{
+ HOST_WIDE_INT cfa_offset;
+ struct machine_function *m = cfun->machine;
+ cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+ - m->frame.stack_pointer_offset;
+
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+ {
+ if (bitmap_bit_p (components, regno))
+ {
+ rtx reg = gen_rtx_REG (word_mode, regno);
+ rtx mem;
+ rtx_insn *insn;
+
+ mem = choose_baseaddr (cfa_offset, NULL);
+ mem = gen_frame_mem (word_mode, mem);
+ insn = emit_move_insn (reg, mem);
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+ }
+ cfa_offset -= UNITS_PER_WORD;
+ }
+}
+
+/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
+void
+ix86_set_handled_components (sbitmap components)
+{
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (bitmap_bit_p (components, regno))
+ {
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
+ cfun->machine->use_fast_prologue_epilogue = true;
+ cfun->machine->frame.save_regs_using_mov = true;
+ }
+}
+
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+ ix86_emit_prologue_components
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+ ix86_emit_epilogue_components
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243..7c16eac 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -179,6 +179,7 @@ struct processor_costs {
const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
zmm_move;
const int sse_to_integer; /* cost of moving SSE register to integer. */
+ const int integer_to_sse; /* cost of moving integer register to SSE. */
const int gather_static, gather_per_elt; /* Cost of gather load is computed
as static + per_item * nelts. */
const int scatter_static, scatter_per_elt; /* Cost of gather store is
@@ -207,6 +208,16 @@ struct processor_costs {
const int divsd; /* cost of DIVSD instructions. */
const int sqrtss; /* cost of SQRTSS instructions. */
const int sqrtsd; /* cost of SQRTSD instructions. */
+ const int cvtss2sd; /* cost SSE FP conversions,
+ such as CVTSS2SD. */
+ const int vcvtps2pd256; /* cost 256bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in ymm. */
+ const int vcvtps2pd512; /* cost 512bit packed FP conversions,
+ such as VCVTPD2PS with larger reg in zmm. */
+ const int cvtsi2ss; /* cost of CVTSI2SS instruction. */
+ const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */
+ const int cvtpi2ps; /* cost of CVTPI2PS instruction. */
+ const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
@@ -479,7 +490,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_SSE_MOVCC_USE_BLENDV \
ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
#define TARGET_ALIGN_TIGHT_LOOPS \
- ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+ ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+#define TARGET_SSE_REDUCTION_PREFER_PSHUF \
+ ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
/* Feature tests against the various architecture variations. */
@@ -525,6 +538,7 @@ extern unsigned char ix86_prefetch_sse;
#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS 0
+#define TARGET_WIN32_TLS 0
#ifndef TARGET_64BIT_DEFAULT
#define TARGET_64BIT_DEFAULT 0
@@ -804,7 +818,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
TARGET_ABSOLUTE_BIGGEST_ALIGNMENT. */
#define BIGGEST_ALIGNMENT \
- (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \
+ (TARGET_IAMCU ? 32 : (TARGET_AVX512F \
? 512 : (TARGET_AVX ? 256 : 128)))
/* Maximum stack alignment. */
@@ -1883,7 +1897,7 @@ typedef struct ix86_args {
MOVE_MAX_PIECES defaults to MOVE_MAX. */
#define MOVE_MAX \
- ((TARGET_AVX512F && TARGET_EVEX512\
+ ((TARGET_AVX512F \
&& (ix86_move_max == PVW_AVX512 \
|| ix86_store_max == PVW_AVX512)) \
? 64 \
@@ -1902,7 +1916,7 @@ typedef struct ix86_args {
store_by_pieces of 16/32/64 bytes. */
#define STORE_MAX_PIECES \
(TARGET_INTER_UNIT_MOVES_TO_VEC \
- ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \
+ ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \
? 64 \
: ((TARGET_AVX \
&& ix86_store_max >= PVW_AVX256) \
@@ -2255,6 +2269,13 @@ extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
} while (0)
#endif
+/* In Intel syntax, we have to quote user-defined labels that would
+ match (unprefixed) registers or operators. */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+ ix86_asm_output_labelref ((STREAM), user_label_prefix, (NAME))
+
/* Under some conditions we need jump tables in the text section,
because the assembler cannot handle label differences between
sections. */
@@ -2396,13 +2417,13 @@ constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES
| PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
| PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
- | PTA_CLWB | PTA_EVEX512;
+ | PTA_CLWB;
constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512
| PTA_AVX512VNNI;
constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16;
constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
| PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
- | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_EVEX512;
+ | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
| PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
| PTA_RDPID | PTA_AVX512VPOPCNTDQ;
@@ -2432,7 +2453,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
| PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR;
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
- | PTA_PREFETCHI;
+ | PTA_PREFETCHI | PTA_AVX10_1;
constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
| PTA_AMX_COMPLEX;
constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST;
@@ -2444,16 +2465,11 @@ constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST
| PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR
| PTA_PREFETCHI;
constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI;
-constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA
- | PTA_GFNI | PTA_VAES | PTA_VPCLMULQDQ | PTA_RDPID | PTA_PCONFIG
- | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD
- | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK
- | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI
- | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256
- | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16
- | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4
- | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32
- | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D
+ | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
+ | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
+ | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
+ | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
@@ -2480,7 +2496,7 @@ constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
| PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL
| PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
- | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_EVEX512;
+ | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI
| PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI;
@@ -2805,6 +2821,10 @@ struct GTY(()) machine_function {
/* Cached initial frame layout for the current function. */
struct ix86_frame frame;
+ /* The components already handled by separate shrink-wrapping, which should
+ not be considered by the prologue and epilogue. */
+ bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
+
/* For -fsplit-stack support: A stack local which holds a pointer to
the stack arguments for a function with a variable number of
arguments. This is set at the start of the function and is used
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d6b2f29..423ef48 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -58,10 +58,11 @@
;; H -- print a memory address offset by 8; used for sse high-parts
;; K -- print HLE lock prefix
;; Y -- print condition for XOP pcom* instruction.
+;; v -- print segment override prefix
;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas).
;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+;; ^ -- print addr32 prefix if Pmode != word_mode
;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
(define_c_enum "unspec" [
@@ -79,6 +80,7 @@
UNSPEC_MACHOPIC_OFFSET
UNSPEC_PCREL
UNSPEC_SIZEOF
+ UNSPEC_SECREL32
;; Prologue support
UNSPEC_STACK_ALLOC
@@ -579,12 +581,11 @@
(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64,
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
- avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
- noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
- noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
- avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
- avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
- vaes_avx512vl,noapx_nf,avx10_2"
+ avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+ avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
+ avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
+ avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl,
+ avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2"
(const_string "base"))
;; The (bounding maximum) length of an instruction immediate.
@@ -954,12 +955,8 @@
(eq_attr "isa" "fma_or_avx512vl")
(symbol_ref "TARGET_FMA || TARGET_AVX512VL")
(eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
- (eq_attr "isa" "avx512f_512")
- (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
(eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
(eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
- (eq_attr "isa" "avx512bw_512")
- (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
(eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
(eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
@@ -1495,7 +1492,7 @@
[(reg:CC FLAGS_REG) (const_int 0)])
(label_ref (match_operand 3))
(pc)))]
- "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
+ "TARGET_AVX512F && !TARGET_PREFER_AVX256"
{
ix86_expand_branch (GET_CODE (operands[0]),
operands[1], operands[2], operands[3]);
@@ -1602,6 +1599,20 @@
[(set_attr "type" "icmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*cmp<mode>_plus_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m")
+ (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+{
+ operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode);
+ return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*cmpqi_ext<mode>_1"
[(set (reg FLAGS_REG)
(compare
@@ -2374,7 +2385,7 @@
(define_expand "movxi"
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"ix86_expand_vector_move (XImode, operands); DONE;")
(define_expand "movoi"
@@ -2427,22 +2438,32 @@
(set_attr "mode" "SI")
(set_attr "length_immediate" "0")])
-(define_insn "*mov<mode>_and"
+;; Generate shorter "and $0,mem" for -Oz. Split it to "mov $0,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_and"
[(set (match_operand:SWI248 0 "memory_operand" "=m")
(match_operand:SWI248 1 "const0_operand"))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"and{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+ [(set (match_dup 0) (match_dup 1))]
+ ""
[(set_attr "type" "alu1")
(set_attr "mode" "<MODE>")
(set_attr "length_immediate" "1")])
-(define_insn "*mov<mode>_or"
+;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_or"
[(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
(match_operand:SWI248 1 "constm1_operand"))
(clobber (reg:CC FLAGS_REG))]
"reload_completed"
"or{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+ [(set (match_dup 0) (match_dup 1))]
+ ""
[(set_attr "type" "alu1")
(set_attr "mode" "<MODE>")
(set_attr "length_immediate" "1")])
@@ -2450,7 +2471,7 @@
(define_insn "*movxi_internal_avx512f"
[(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
(match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
{
@@ -2947,6 +2968,7 @@
(match_operand:SWI248 1 "const_int_operand"))]
"optimize_insn_for_size_p () && optimize_size > 1
&& operands[1] != const0_rtx
+ && operands[1] != constm1_rtx
&& IN_RANGE (INTVAL (operands[1]), -128, 127)
&& !ix86_red_zone_used
&& REGNO (operands[0]) != SP_REG"
@@ -4414,7 +4436,7 @@
(eq_attr "alternative" "11")
(const_string "DI")
(eq_attr "alternative" "5")
- (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
+ (cond [(and (match_test "TARGET_AVX512F")
(not (match_test "TARGET_PREFER_AVX256")))
(const_string "V16SF")
(match_test "TARGET_AVX")
@@ -5482,7 +5504,7 @@
(set_attr "memory" "none")
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "2")
- (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+ (symbol_ref "TARGET_AVX512F
&& !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
(const_string "*")))])
@@ -5704,7 +5726,7 @@
/* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
and it always round to even.
- flag_unsafte_math_optimization is needed for psrld.
+ flag_unsafe_math_optimization is needed for psrld.
If we don't expect qNaNs nor sNaNs and can assume rounding
to nearest, we can expand the conversion inline as
(fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */
@@ -8708,6 +8730,34 @@
(set (match_dup 1)
(minus:SWI (match_dup 1) (match_dup 0)))])])
+;; Under APX NDD, 'sub reg, mem, reg' is valid.
+;; New format for
+;; mov reg0, mem1
+;; sub reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sub mem2, reg0
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SWI 2 "memory_operand")
+ (match_dup 0)))
+ (set (match_dup 0)
+ (minus:SWI (match_dup 2) (match_dup 0)))])
+ (set (match_dup 2) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 1))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_dup 2) (match_dup 0)))
+ (set (match_dup 2)
+ (minus:SWI (match_dup 2) (match_dup 0)))])])
+
;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
;; subl $1, %eax; jnc .Lxx;
(define_peephole2
@@ -9155,6 +9205,118 @@
(match_dup 1))
(match_dup 0)))])])
+;; Under APX NDD, 'adc reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem1, reg0
+;; to
+;; mov reg0, mem2
+;; adc mem1, reg0
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_operand:SWI48 2 "memory_operand"))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))])
+ (set (match_dup 1) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 1)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))])])
+
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; adc mem2, reg0
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_operand:SWI48 2 "memory_operand"))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))])
+ (set (match_dup 2) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 1))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))
+ (plus:<DWI>
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 2)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 2))
+ (match_dup 0)))])])
+
(define_peephole2
[(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
@@ -9635,6 +9797,52 @@
[(match_dup 3) (const_int 0)]))
(match_dup 0)))])])
+;; Under APX NDD, 'sbb reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; sbb reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sbb mem2, reg0
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand"))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (zero_extend:<DWI>
+ (match_dup 0)))))
+ (set (match_dup 0)
+ (minus:SWI48
+ (minus:SWI48
+ (match_dup 2)
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])
+ (set (match_dup 2) (match_dup 0))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 1))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_dup 2))
+ (plus:<DWI> (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 2)
+ (minus:SWI48 (minus:SWI48 (match_dup 2)
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])])
+
(define_peephole2
[(set (match_operand:SWI48 6 "general_reg_operand")
(match_operand:SWI48 7 "memory_operand"))
@@ -21315,11 +21523,12 @@
(set_attr "mode" "SI")])
; As bsr is undefined behavior on zero and for other input
-; values it is in range 0 to 63, we can optimize away sign-extends.
-(define_insn_and_split "*bsr_rex64_2"
+; values it is in range 0 to 63, we can optimize away sign-extends
+; or zero-extends.
+(define_insn_and_split "*bsr_rex64<u>_2"
[(set (match_operand:DI 0 "register_operand")
(xor:DI
- (sign_extend:DI
+ (any_extend:DI
(minus:SI
(const_int 63)
(subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21341,9 +21550,9 @@
operands[3] = lowpart_subreg (SImode, operands[2], DImode);
})
-(define_insn_and_split "*bsr_2"
+(define_insn_and_split "*bsr<u>_2"
[(set (match_operand:DI 0 "register_operand")
- (sign_extend:DI
+ (any_extend:DI
(xor:SI
(minus:SI
(const_int 31)
@@ -21420,7 +21629,7 @@
(minus:DI
(match_operand:DI 2 "const_int_operand")
(xor:DI
- (sign_extend:DI
+ (any_extend:DI
(minus:SI (const_int 63)
(subreg:SI
(clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21450,7 +21659,7 @@
[(set (match_operand:DI 0 "register_operand")
(minus:DI
(match_operand:DI 2 "const_int_operand")
- (sign_extend:DI
+ (any_extend:DI
(xor:SI
(minus:SI (const_int 31)
(clz:SI (match_operand:SI 1 "nonimmediate_operand")))
@@ -25587,10 +25796,6 @@
(clobber (reg:CC FLAGS_REG))])]
""
{
- /* Can't use this for non-default address spaces. */
- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
- FAIL;
-
int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
/* If .md ever supports :P for Pmode, these can be directly
@@ -25598,9 +25803,14 @@
operands[5] = plus_constant (Pmode, operands[0], piece_size);
operands[6] = plus_constant (Pmode, operands[2], piece_size);
- /* Can't use this if the user has appropriated esi or edi. */
+ /* Can't use this if the user has appropriated esi or edi,
+ * or if we have the destination in the non-default address space,
+ * since string insns cannot override the destination segment. */
if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
- && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+ && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
+ && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))
+ && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))
+ || Pmode == word_mode))
{
emit_insn (gen_strmov_singleop (operands[0], operands[1],
operands[2], operands[3],
@@ -25635,8 +25845,15 @@
(const_int 8)))]
"TARGET_64BIT
&& !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsq"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movsq";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "DI")])
@@ -25651,8 +25868,15 @@
(plus:P (match_dup 3)
(const_int 4)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movs{l|d}"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movs{l|d}";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "SI")])
@@ -25667,8 +25891,15 @@
(plus:P (match_dup 3)
(const_int 2)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsw"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movsw";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "HI")])
@@ -25683,8 +25914,15 @@
(plus:P (match_dup 3)
(const_int 1)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsb"
+ && ix86_check_movs (insn, 0)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1movsb";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set (attr "prefix_rex")
@@ -25723,8 +25961,15 @@
(use (match_dup 5))]
"TARGET_64BIT
&& !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movsq"
+ && ix86_check_movs (insn, 3)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1rep{%;} movsq";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
@@ -25743,8 +25988,15 @@
(mem:BLK (match_dup 4)))
(use (match_dup 5))]
"!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movs{l|d}"
+ && ix86_check_movs (insn, 3)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1rep{%;} movs{l|d}";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
@@ -25761,8 +26013,15 @@
(mem:BLK (match_dup 4)))
(use (match_dup 5))]
"!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movsb"
+ && ix86_check_movs (insn, 3)"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^%v1rep{%;} movsb";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
@@ -25844,7 +26103,8 @@
(unspec [(const_int 0)] UNSPEC_STOS)]
"TARGET_64BIT
&& !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosq"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25858,7 +26118,8 @@
(const_int 4)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stos{l|d}"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25872,7 +26133,8 @@
(const_int 2)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosw"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25886,7 +26148,8 @@
(const_int 1)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosb"
[(set_attr "type" "str")
(set_attr "memory" "store")
@@ -25922,7 +26185,8 @@
(use (match_dup 4))]
"TARGET_64BIT
&& !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stosq"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
@@ -25940,7 +26204,8 @@
(use (match_operand:SI 2 "register_operand" "a"))
(use (match_dup 4))]
"!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stos{l|d}"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
@@ -25957,7 +26222,8 @@
(use (match_operand:QI 2 "register_operand" "a"))
(use (match_dup 4))]
"!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stosb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
@@ -26224,8 +26490,8 @@
(define_expand "mov<mode>cc"
[(set (match_operand:SWIM 0 "register_operand")
(if_then_else:SWIM (match_operand 1 "comparison_operator")
- (match_operand:SWIM 2 "<general_operand>")
- (match_operand:SWIM 3 "<general_operand>")))]
+ (match_operand:SWIM 2 "general_operand")
+ (match_operand:SWIM 3 "general_operand")))]
""
"if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
@@ -26592,8 +26858,8 @@
[(set (match_operand:X87MODEF 0 "register_operand")
(if_then_else:X87MODEF
(match_operand 1 "comparison_operator")
- (match_operand:X87MODEF 2 "register_operand")
- (match_operand:X87MODEF 3 "register_operand")))]
+ (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand")
+ (match_operand:X87MODEF 3 "nonimm_or_0_operand")))]
"(TARGET_80387 && TARGET_CMOVE)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
@@ -27197,6 +27463,28 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (plus:P (match_operand:P 1 "register_operand" "r")
+ (match_operand:P 2 "<nonmemory_operand>" "l<i>")))
+ (clobber (mem:BLK (scratch)))]
+ ""
+{
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
+ else
+ {
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
+ }
+}
+ [(set (attr "length_immediate")
+ (cond [(eq_attr "type" "imov")
+ (const_string "0")
+ ]
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
+
(define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
[(set (match_operand:P 0 "register_operand" "=r")
(minus:P (match_operand:P 1 "register_operand" "0")
@@ -28144,6 +28432,41 @@
const0_rtx);
})
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+ [(parallel [(set (reg FLAGS_REG)
+ (compare (match_operator:SWI 2 "plusminuslogic_operator"
+ [(match_operand:SWI 0 "memory_operand")
+ (match_operand:SWI 1 "<nonmemory_operand>")])
+ (const_int 0)))
+ (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+ (set (match_dup 0) (match_dup 3))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (2, operands[3])
+ && !reg_overlap_mentioned_p (operands[3], operands[0])
+ && ix86_match_ccmode (peep2_next_insn (0),
+ (GET_CODE (operands[2]) == PLUS
+ || GET_CODE (operands[2]) == MINUS)
+ ? CCGOCmode : CCNOmode)"
+ [(parallel [(set (match_dup 4) (match_dup 6))
+ (set (match_dup 0) (match_dup 5))])]
+{
+ operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+ operands[5]
+ = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ copy_rtx (operands[0]), operands[1]);
+ operands[6]
+ = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+ const0_rtx);
+})
+
;; Likewise for instances where we have a lea pattern.
(define_peephole2
[(set (match_operand:SWI 0 "register_operand")
@@ -28237,6 +28560,54 @@
const0_rtx);
})
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+ [(set (match_operand:SWI 0 "register_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_operand:SWI 4 "register_operand")
+ (xor:SWI (match_operand:SWI 3 "register_operand")
+ (match_operand:SWI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_dup 4))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SWI 5 "register_operand")
+ (match_operand:SWI 6 "<nonmemory_operand>")))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && REGNO (operands[3]) == REGNO (operands[0])
+ && (rtx_equal_p (operands[0], operands[5])
+ ? rtx_equal_p (operands[2], operands[6])
+ : rtx_equal_p (operands[2], operands[5])
+ && rtx_equal_p (operands[0], operands[6]))
+ && peep2_reg_dead_p (3, operands[4])
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && (<MODE>mode != QImode
+ || immediate_operand (operands[2], QImode)
+ || any_QIreg_operand (operands[2], QImode))"
+ [(parallel [(set (match_dup 7) (match_dup 9))
+ (set (match_dup 1) (match_dup 8))])]
+{
+ operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+ operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+ operands[2]);
+ operands[9]
+ = gen_rtx_COMPARE (GET_MODE (operands[7]),
+ copy_rtx (operands[8]),
+ const0_rtx);
+})
+
(define_peephole2
[(set (match_operand:SWI12 0 "register_operand")
(match_operand:SWI12 1 "memory_operand"))
@@ -28480,6 +28851,58 @@
const0_rtx);
})
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+ [(set (match_operand:SWI12 0 "register_operand")
+ (match_operand:SWI12 1 "memory_operand"))
+ (parallel [(set (match_operand:SI 4 "register_operand")
+ (xor:SI (match_operand:SI 3 "register_operand")
+ (match_operand:SI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SWI12 6 "register_operand")
+ (match_operand:SWI12 7 "<nonmemory_operand>")))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && REGNO (operands[3]) == REGNO (operands[0])
+ && REGNO (operands[5]) == REGNO (operands[4])
+ && (rtx_equal_p (operands[0], operands[6])
+ ? (REG_P (operands[2])
+ ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+ : rtx_equal_p (operands[2], operands[7]))
+ : (rtx_equal_p (operands[0], operands[7])
+ && REG_P (operands[2])
+ && REGNO (operands[2]) == REGNO (operands[6])))
+ && peep2_reg_dead_p (3, operands[5])
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && (<MODE>mode != QImode
+ || immediate_operand (operands[2], SImode)
+ || any_QIreg_operand (operands[2], SImode))"
+ [(parallel [(set (match_dup 8) (match_dup 10))
+ (set (match_dup 1) (match_dup 9))])]
+{
+ operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+ operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+ gen_lowpart (<MODE>mode, operands[2]));
+ operands[10]
+ = gen_rtx_COMPARE (GET_MODE (operands[8]),
+ copy_rtx (operands[9]),
+ const0_rtx);
+})
+
;; Attempt to optimize away memory stores of values the memory already
;; has. See PR79593.
(define_peephole2
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 27d34bd..c93c0b1 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -36,13 +36,6 @@ HOST_WIDE_INT ix86_isa_flags_explicit
Variable
HOST_WIDE_INT ix86_isa_flags2_explicit
-; Indicate if AVX512 and AVX10.1 are explicitly set no.
-Variable
-int ix86_no_avx512_explicit = 0
-
-Variable
-int ix86_no_avx10_1_explicit = 0
-
; Additional target flags
Variable
int ix86_target_flags
@@ -103,14 +96,6 @@ HOST_WIDE_INT x_ix86_isa_flags2_explicit
TargetSave
HOST_WIDE_INT x_ix86_isa_flags_explicit
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx512_explicit
-
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx10_1_explicit
-
;; whether -mtune was not specified
TargetSave
unsigned char tune_defaulted
@@ -721,13 +706,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
msse4
-Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
-mno-sse4
-Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save
-Do not support SSE4.1 and SSE4.2 built-in functions and code generation.
-
msse5
Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
;; Deprecated
@@ -1355,38 +1336,24 @@ mapx-inline-asm-use-gpr32
Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
Enable GPR32 in inline asm when APX_F enabled.
-mevex512
-Target Mask(ISA2_EVEX512) Var(ix86_isa_flags2) Save Warn(%<-mevex512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support 512 bit vector built-in functions and code generation.
-
musermsr
Target Mask(ISA2_USER_MSR) Var(ix86_isa_flags2) Save
Support USER_MSR built-in functions and code generation.
-mavx10.1-256
-Target Mask(ISA2_AVX10_1_256) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-256 built-in functions and code generation.
-
mavx10.1
-Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
-
-mavx10.1-512
-Target Alias(mavx10.1)
+Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
+and AVX10.1 built-in functions and code generation.
mavx10.2
Target Mask(ISA2_AVX10_2) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-AVX10.1-512 and AVX10.2 built-in functions and code generation.
+AVX10.1 and AVX10.2 built-in functions and code generation.
mamx-avx512
Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512,
-AVX10.2 and AMX-AVX512 built-in functions and code generation.
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+AVX10.1, AVX10.2 and AMX-AVX512 built-in functions and code generation.
mamx-tf32
Target Mask(ISA2_AMX_TF32) Var(ix86_isa_flags2) Save
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 0d5a5a1..cce524c 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -590,21 +590,12 @@ UrlSuffix(gcc/x86-Options.html#index-mapxf)
mapx-inline-asm-use-gpr32
UrlSuffix(gcc/x86-Options.html#index-mapx-inline-asm-use-gpr32)
-mevex512
-UrlSuffix(gcc/x86-Options.html#index-mevex512)
-
musermsr
UrlSuffix(gcc/x86-Options.html#index-musermsr)
-mavx10.1-256
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256)
-
mavx10.1
UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1)
-mavx10.1-512
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512)
-
mavx10.2
UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2)
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index c30a4e0..b195fe5 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -148,24 +148,14 @@
#include <avx10_2mediaintrin.h>
-#include <avx10_2-512mediaintrin.h>
-
#include <avx10_2convertintrin.h>
-#include <avx10_2-512convertintrin.h>
-
#include <avx10_2bf16intrin.h>
-#include <avx10_2-512bf16intrin.h>
-
#include <avx10_2satcvtintrin.h>
-#include <avx10_2-512satcvtintrin.h>
-
#include <avx10_2minmaxintrin.h>
-#include <avx10_2-512minmaxintrin.h>
-
#include <avx10_2copyintrin.h>
#include <movrsintrin.h>
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3d3848c..1bd63b2 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -218,6 +218,7 @@
case UNSPEC_DTPOFF:
case UNSPEC_GOTNTPOFF:
case UNSPEC_NTPOFF:
+ case UNSPEC_SECREL32:
return true;
default:
break;
@@ -392,6 +393,23 @@
return false;
})
+;; Return true if VALUE is a constant integer whose negation satisfies
+;; x86_64_immediate_operand.
+(define_predicate "x86_64_neg_const_int_operand"
+ (match_code "const_int")
+{
+ HOST_WIDE_INT val = -UINTVAL (op);
+ if (mode == DImode && trunc_int_for_mode (val, SImode) != val)
+ return false;
+ if (flag_cf_protection & CF_BRANCH)
+ {
+ unsigned HOST_WIDE_INT endbr = TARGET_64BIT ? 0xfa1e0ff3 : 0xfb1e0ff3;
+ if ((val & HOST_WIDE_INT_C (0xffffffff)) == endbr)
+ return false;
+ }
+ return true;
+})
+
;; Return true if VALUE is a constant integer whose low and high words satisfy
;; x86_64_immediate_operand.
(define_predicate "x86_64_hilo_int_operand"
@@ -1267,12 +1285,19 @@
(match_operand 0 "vector_memory_operand")
(match_code "const_vector")))
+; Return true when OP is register_operand, vector_memory_operand,
+; const_vector zero or const_vector all ones.
+(define_predicate "vector_or_0_or_1s_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "vector_memory_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
(define_predicate "bcst_mem_operand"
(and (match_code "vec_duplicate")
(and (match_test "TARGET_AVX512F")
(ior (match_test "TARGET_AVX512VL")
- (and (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")
- (match_test "TARGET_EVEX512"))))
+ (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
(match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
(match_test "GET_MODE (XEXP (op, 0))
== GET_MODE_INNER (GET_MODE (op))")
@@ -1333,6 +1358,12 @@
(ior (match_operand 0 "nonimmediate_operand")
(match_operand 0 "const0_operand")))
+; Return true when OP is a nonimmediate or zero or all ones.
+(define_predicate "nonimm_or_0_or_1s_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")
+ (match_operand 0 "int_float_vector_all_ones_operand")))
+
;; Return true for RTX codes that force SImode address.
(define_predicate "SImode_address_operand"
(match_code "subreg,zero_extend,and"))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b280676..252ba07 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,63 +279,63 @@
;; All vector modes including V?TImode, used in move patterns.
(define_mode_iterator VMOVE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX") V1TI
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline.
(define_mode_iterator V48_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator V48_256_512_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL")])
;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
(define_mode_iterator V48H_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
(define_mode_iterator VI12_AVX512VL
- [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+ [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
(define_mode_iterator VI12HFBF_AVX512VL
- [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
- (V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
- (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+ [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
(define_mode_iterator VI1_AVX512VL
- [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+ [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
;; All vector modes
(define_mode_iterator V
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
;; All 128bit vector modes
(define_mode_iterator V_128
@@ -352,54 +352,44 @@
;; All 512bit vector modes
(define_mode_iterator V_512
- [(V64QI "TARGET_EVEX512") (V32HI "TARGET_EVEX512")
- (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
- (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
- (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+ [V64QI V32HI V16SI V8DI
+ V16SF V8DF V32HF V32BF])
;; All 256bit and 512bit vector modes
(define_mode_iterator V_256_512
[V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF
- (V64QI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HF "TARGET_AVX512F && TARGET_EVEX512")
- (V32BF "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+ (V32HF "TARGET_AVX512F") (V32BF "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
;; All vector float modes
(define_mode_iterator VF
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
(V2DF "TARGET_SSE2")])
(define_mode_iterator VF1_VF2_AVX512DQ
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512DQ")
(V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
(V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
-(define_mode_iterator VF1_VF2_AVX10_2
- [(V16SF "TARGET_AVX10_2") V8SF V4SF
- (V8DF "TARGET_AVX10_2") V4DF V2DF])
-
(define_mode_iterator VFH
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
(V2DF "TARGET_SSE2")])
(define_mode_iterator VF_BHSD
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
+ (V16SF "TARGET_AVX512F")
(V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+ (V8DF "TARGET_AVX512F")
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
(V32BF "TARGET_AVX10_2")
(V16BF "TARGET_AVX10_2")
@@ -408,12 +398,12 @@
;; 128-, 256- and 512-bit float vector modes for bitwise operations
(define_mode_iterator VFB
- [(V32BF "TARGET_AVX512F && TARGET_EVEX512")
+ [(V32BF "TARGET_AVX512F")
(V16BF "TARGET_AVX") (V8BF "TARGET_SSE2")
- (V32HF "TARGET_AVX512F && TARGET_EVEX512")
+ (V32HF "TARGET_AVX512F")
(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F")
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
;; 128- and 256-bit float vector modes
@@ -430,44 +420,39 @@
;; All SFmode vector float modes
(define_mode_iterator VF1
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF])
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
(define_mode_iterator VF1_AVX2
- [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
+ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
;; 128- and 256-bit SF vector modes
(define_mode_iterator VF1_128_256
[(V8SF "TARGET_AVX") V4SF])
(define_mode_iterator VF1_128_256VL
- [(V8SF "TARGET_EVEX512") (V4SF "TARGET_AVX512VL")])
+ [V8SF (V4SF "TARGET_AVX512VL")])
;; All DFmode vector float modes
(define_mode_iterator VF2
- [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
-
-(define_mode_iterator VF2_AVX10_2
- [(V8DF "TARGET_AVX10_2") V4DF V2DF])
+ [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; All DFmode & HFmode & BFmode vector float modes
(define_mode_iterator VF2HB
- [(V32BF "TARGET_AVX10_2")
- (V16BF "TARGET_AVX10_2")
- (V8BF "TARGET_AVX10_2")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2")
+ (V8BF "TARGET_AVX10_2") (V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
;; 128- and 256-bit DF vector modes
(define_mode_iterator VF2_128_256
[(V4DF "TARGET_AVX") V2DF])
(define_mode_iterator VF2_512_256
- [(V8DF "TARGET_AVX512F && TARGET_EVEX512") V4DF])
+ [(V8DF "TARGET_AVX512F") V4DF])
(define_mode_iterator VF2_512_256VL
- [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+ [V8DF (V4DF "TARGET_AVX512VL")])
;; All 128bit vector SF/DF modes
(define_mode_iterator VF_128
@@ -484,116 +469,102 @@
;; All 512bit vector float modes
(define_mode_iterator VF_512
- [(V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+ [V16SF V8DF])
;; All 512bit vector float modes for bitwise operations
(define_mode_iterator VFB_512
- [(V32BF "TARGET_EVEX512")
- (V32HF "TARGET_EVEX512")
- (V16SF "TARGET_EVEX512")
- (V8DF "TARGET_EVEX512")])
+ [V32BF V32HF V16SF V8DF])
(define_mode_iterator V24F_128
[V4SF V8HF V8BF])
(define_mode_iterator VI48_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI1248_AVX512VLBW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512")
+ [(V64QI "TARGET_AVX512BW")
(V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
(V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ (V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
(V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
- (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
-
-(define_mode_iterator VI1248_AVX10_2
- [(V64QI "TARGET_AVX10_2") V32QI V16QI
- (V32HI "TARGET_AVX10_2") V16HI V8HI
- (V16SI "TARGET_AVX10_2") V8SI V4SI
- (V8DI "TARGET_AVX10_2") V4DI V2DI])
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VF_AVX512VL
- [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator VFH_AVX512VL
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator V48_AVX512VL_4
+ [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL_4
+ [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
-(define_mode_iterator VFH_AVX10_2
- [(V32HF "TARGET_AVX10_2") V16HF V8HF
- (V16SF "TARGET_AVX10_2") V8SF V4SF
- (V8DF "TARGET_AVX10_2") V4DF V2DF])
+(define_mode_iterator V8_AVX512VL_2
+ [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VF2_AVX512VL
- [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator VF1_AVX512VL
- [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
(define_mode_iterator VF1_AVX512BW
- [(V16SF "TARGET_AVX512BW && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
-
-(define_mode_iterator VF1_AVX10_2
- [(V16SF "TARGET_AVX10_2") V8SF V4SF])
+ [(V16SF "TARGET_AVX512BW") (V8SF "TARGET_AVX2") V4SF])
(define_mode_iterator VHFBF
- [(V32HF "TARGET_EVEX512") V16HF V8HF
- (V32BF "TARGET_EVEX512") V16BF V8BF])
+ [V32HF V16HF V8HF V32BF V16BF V8BF])
(define_mode_iterator VHFBF_256 [V16HF V16BF])
(define_mode_iterator VHFBF_128 [V8HF V8BF])
(define_mode_iterator VHF_AVX512VL
- [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
+ [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
(define_mode_iterator VHFBF_AVX512VL
- [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
- (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-
-(define_mode_iterator VHF_AVX10_2
- [(V32HF "TARGET_AVX10_2") V16HF V8HF])
+ [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-(define_mode_iterator VBF_AVX10_2
- [(V32BF "TARGET_AVX10_2") V16BF V8BF])
+(define_mode_iterator VBF
+ [V32BF V16BF V8BF])
;; All vector integer modes
(define_mode_iterator VI
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
+ [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI])
;; All vector integer and HF modes
(define_mode_iterator VIHFBF
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V8SI "TARGET_AVX") V4SI
- (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF])
+ [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+ (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF])
(define_mode_iterator VI_AVX2
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI_AVX_AVX512F
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
;; All QImode vector integer modes
(define_mode_iterator VI1
@@ -611,56 +582,50 @@
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
(define_mode_iterator VI8
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
-
-(define_mode_iterator VI8_AVX10_2
- [(V8DI "TARGET_AVX10_2") V4DI V2DI])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI8_FVL
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")])
+ [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI8_AVX512VL
- [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+ [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI8_256_512
- [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")])
+ [V8DI (V4DI "TARGET_AVX512VL")])
(define_mode_iterator VI1_AVX2
[(V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI1_AVX512
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI1_AVX512F
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
(define_mode_iterator VI1_AVX512VNNI
- [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+ [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI1_AVX512VNNIBW
- [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+ [(V64QI "TARGET_AVX512BW || TARGET_AVX512VNNI")
(V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI12_256_512_AVX512VL
- [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
+ [V64QI (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL")])
(define_mode_iterator VI2_AVX2
[(V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX2_AVX512BW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX512F
- [(V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+ [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX512VNNIBW
- [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+ [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI")
(V16HI "TARGET_AVX2") V8HI])
-(define_mode_iterator VI2_AVX10_2
- [(V32HI "TARGET_AVX10_2") V16HI V8HI])
-
(define_mode_iterator VI4_AVX
[(V8SI "TARGET_AVX") V4SI])
@@ -668,65 +633,64 @@
[(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI4_AVX512F
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI4_AVX512VL
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
(define_mode_iterator VI4_AVX10_2
[(V16SI "TARGET_AVX10_2") V8SI V4SI])
(define_mode_iterator VI48_AVX512F_AVX512VL
- [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512")
+ [V4SI V8SI (V16SI "TARGET_AVX512F")
(V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V8DI "TARGET_AVX512F")])
(define_mode_iterator VI2_AVX512VL
- [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")])
+ [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
(define_mode_iterator VI2HFBF_AVX512VL
- [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
- (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") (V32HF "TARGET_EVEX512")
- (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") (V32BF "TARGET_EVEX512")])
+ [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+ (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") V32HF
+ (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") V32BF])
(define_mode_iterator VI2H_AVX512VL
- [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
- (V8SI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512")])
+ [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+ (V8SI "TARGET_AVX512VL") V16SI V8DI])
(define_mode_iterator VI1_AVX512VL_F
- [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F && TARGET_EVEX512")])
+ [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
(define_mode_iterator VI8_AVX2_AVX512BW
- [(V8DI "TARGET_AVX512BW && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX2
[(V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX2_AVX512F
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX_AVX512F
- [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")])
+ [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
(define_mode_iterator VI4_128_8_256
[V4SI V4DI])
;; All V8D* modes
(define_mode_iterator V8FI
- [(V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V8DF V8DI])
;; All V16S* modes
(define_mode_iterator V16FI
- [(V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+ [V16SF V16SI])
;; ??? We should probably use TImode instead.
(define_mode_iterator VIMAX_AVX2_AVX512BW
- [(V4TI "TARGET_AVX512BW && TARGET_EVEX512") (V2TI "TARGET_AVX2") V1TI])
+ [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
;; Suppose TARGET_AVX512BW as baseline
(define_mode_iterator VIMAX_AVX512VL
- [(V4TI "TARGET_EVEX512") (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+ [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
(define_mode_iterator VIMAX_AVX2
[(V2TI "TARGET_AVX2") V1TI])
@@ -736,17 +700,17 @@
(V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI12_AVX2_AVX512BW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI24_AVX2
[(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI124_AVX2
[(V32QI "TARGET_AVX2") V16QI
@@ -754,17 +718,17 @@
(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI248_AVX512VL
- [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+ [V32HI V16SI V8DI
(V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI248_AVX512VLBW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ [(V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
(V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
- (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_mode_iterator VI48_AVX2
[(V8SI "TARGET_AVX2") V4SI
@@ -776,17 +740,16 @@
(V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512BW && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI248_AVX512BW
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16SI "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512")])
+ [(V32HI "TARGET_AVX512BW") V16SI V8DI])
(define_mode_iterator VI248_AVX512BW_AVX512VL
- [(V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [(V32HI "TARGET_AVX512BW")
+ (V4DI "TARGET_AVX512VL") V16SI V8DI])
;; Suppose TARGET_AVX512VL as baseline
(define_mode_iterator VI248_AVX512BW_1
@@ -800,16 +763,16 @@
V4DI V2DI])
(define_mode_iterator VI48_AVX512F
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512") V8SI V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI V2DI])
+ [(V16SI "TARGET_AVX512F") V8SI V4SI
+ (V8DI "TARGET_AVX512F") V4DI V2DI])
(define_mode_iterator VI48_AVX_AVX512F
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI12_AVX_AVX512F
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
(define_mode_iterator V48_128_256
[V4SF V2DF
@@ -950,10 +913,10 @@
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
(define_mode_iterator VI248_256 [V16HI V8SI V4DI])
(define_mode_iterator VI248_512
- [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V32HI V16SI V8DI])
(define_mode_iterator VI48_128 [V4SI V2DI])
(define_mode_iterator VI148_512
- [(V64QI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V64QI V16SI V8DI])
(define_mode_iterator VI148_256 [V32QI V8SI V4DI])
(define_mode_iterator VI148_128 [V16QI V4SI V2DI])
@@ -961,75 +924,62 @@
(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
(define_mode_iterator VI124_256_AVX512F_AVX512BW
[V32QI V16HI V8SI
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
+ (V16SI "TARGET_AVX512F")])
(define_mode_iterator VI48_256 [V8SI V4DI])
(define_mode_iterator VI48_512
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [V16SI V8DI])
(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
(define_mode_iterator VI_AVX512BW
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")])
+ [V16SI V8DI
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
(define_mode_iterator VIHFBF_AVX512BW
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
- (V32HF "TARGET_AVX512BW && TARGET_EVEX512")
- (V32BF "TARGET_AVX512BW && TARGET_EVEX512")])
+ [V16SI V8DI
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
+ (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")])
;; Int-float size matches
(define_mode_iterator VI2F_256_512
- [V16HI (V32HI "TARGET_EVEX512")
- V16HF (V32HF "TARGET_EVEX512")
- V16BF (V32BF "TARGET_EVEX512")])
+ [V16HI V32HI V16HF V32HF V16BF V32BF])
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
(define_mode_iterator VI8F_256 [V4DI V4DF])
(define_mode_iterator VI4F_256_512
- [V8SI V8SF
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")])
+ [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
(define_mode_iterator VI48F_256_512
[V8SI V8SF
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
+ (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
(define_mode_iterator VF48H_AVX512VL
- [(V8DF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+ [V8DF V16SF (V8SF "TARGET_AVX512VL")])
(define_mode_iterator VF48_128
[V2DF V4SF])
(define_mode_iterator VI48F
- [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+ [V16SI V16SF V8DI V8DF
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_mode_iterator VI12_VI48F_AVX512VL
- [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+ [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
- (V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
- (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+ V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+ V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
(define_mode_iterator V8_128 [V8HI V8HF V8BF])
(define_mode_iterator V16_256 [V16HI V16HF V16BF])
(define_mode_iterator V32_512
- [(V32HI "TARGET_EVEX512") (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+ [V32HI V32HF V32BF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse
@@ -1441,7 +1391,7 @@
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
(define_mode_iterator AVX512MODE2P
- [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+ [V16SI V16SF V8DF])
;; Mapping for dbpsabbw modes
(define_mode_attr dbpsadbwmode
@@ -1639,6 +1589,44 @@
"&& 1"
[(set (match_dup 0) (match_dup 1))])
+(define_insn_and_split "*<avx512>_load<mode>mask_and15"
+ [(set (match_operand:V48_AVX512VL_4 0 "register_operand" "=v")
+ (vec_merge:V48_AVX512VL_4
+ (unspec:V48_AVX512VL_4
+ [(match_operand:V48_AVX512VL_4 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+ (match_operand:V48_AVX512VL_4 2 "nonimm_or_0_operand" "0C")
+ (and:QI
+ (match_operand:QI 3 "register_operand" "Yk")
+ (const_int 15))))]
+ "TARGET_AVX512F"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (vec_merge:V48_AVX512VL_4
+ (unspec:V48_AVX512VL_4 [(match_dup 1)] UNSPEC_MASKLOAD)
+ (match_dup 2)
+ (match_dup 3)))])
+
+(define_insn_and_split "*<avx512>_load<mode>mask_and3"
+ [(set (match_operand:V8_AVX512VL_2 0 "register_operand" "=v")
+ (vec_merge:V8_AVX512VL_2
+ (unspec:V8_AVX512VL_2
+ [(match_operand:V8_AVX512VL_2 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+ (match_operand:V8_AVX512VL_2 2 "nonimm_or_0_operand" "0C")
+ (and:QI
+ (match_operand:QI 3 "register_operand" "Yk")
+ (const_int 3))))]
+ "TARGET_AVX512F"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (vec_merge:V8_AVX512VL_2
+ (unspec:V8_AVX512VL_2 [(match_dup 1)] UNSPEC_MASKLOAD)
+ (match_dup 2)
+ (match_dup 3)))])
+
(define_expand "<avx512>_load<mode>_mask"
[(set (match_operand:VI12_AVX512VL 0 "register_operand")
(vec_merge:VI12_AVX512VL
@@ -2049,11 +2037,9 @@
(define_mode_iterator STORENT_MODE
[(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
(define_expand "storent<mode>"
[(set (match_operand:STORENT_MODE 0 "memory_operand")
@@ -2857,10 +2843,10 @@
})
(define_expand "div<mode>3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand")
- (div:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand")
- (match_operand:VBF_AVX10_2 2 "vector_operand")))]
+ [(set (match_operand:VBF 0 "register_operand")
+ (div:VBF
+ (match_operand:VBF 1 "register_operand")
+ (match_operand:VBF 2 "vector_operand")))]
"TARGET_AVX10_2"
{
if (TARGET_RECIP_VEC_DIV
@@ -3897,15 +3883,12 @@
(define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
+ (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V64QI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_PLUS_MODE
@@ -3948,13 +3931,11 @@
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
- (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512BW")
+ (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "reduc_<code>_scal_<mode>"
[(smaxmin:REDUC_SMINMAX_MODE
@@ -4063,10 +4044,8 @@
(define_mode_iterator REDUC_ANY_LOGIC_MODE
[(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V64QI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
(define_expand "reduc_<code>_scal_<mode>"
[(any_logic:REDUC_ANY_LOGIC_MODE
@@ -4410,7 +4389,7 @@
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
- (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
"TARGET_AVX512F
&& (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
@@ -4428,7 +4407,7 @@
(unspec:<V48H_AVX512VL:avx512fmaskmode>
[(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ (match_operand:SI 3 "<cmp_imm_predicate>")]
UNSPEC_PCMP)))
(set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
(unspec:<V48H_AVX512VL:avx512fmaskmode>
@@ -4469,7 +4448,8 @@
(match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8
+ && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -4480,6 +4460,70 @@
UNSPEC_PCMP))]
"operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
+(define_insn "*<avx512>_cmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v")
+ (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 15)))]
+ "TARGET_AVX512F"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v")
+ (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512vl_ucmpv2di3_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V2DI 1 "nonimmediate_operand" "v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)
+ (const_int 3)))]
+ "TARGET_AVX512F"
+ "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -4762,7 +4806,8 @@
(match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_7_operand")]
UNSPEC_UNSIGNED_PCMP)))]
- "TARGET_AVX512F && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()
+ && GET_MODE_NUNITS (<MODE>mode) >= 8"
"#"
"&& 1"
[(set (match_dup 0)
@@ -4923,8 +4968,8 @@
(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(match_operator:<avx512fmaskmode> 1 ""
- [(match_operand:VBF_AVX10_2 2 "register_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")]))]
+ [(match_operand:VBF 2 "register_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")]))]
"TARGET_AVX10_2"
{
bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -5142,7 +5187,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_256_AVX2 0 "register_operand")
(vec_merge:VI_256_AVX2
- (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
+ (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
(match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5155,7 +5200,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_128 0 "register_operand")
(vec_merge:VI_128
- (match_operand:VI_128 1 "vector_operand")
+ (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
(match_operand:VI_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE2"
@@ -5168,7 +5213,7 @@
(define_expand "vcond_mask_v1tiv1ti"
[(set (match_operand:V1TI 0 "register_operand")
(vec_merge:V1TI
- (match_operand:V1TI 1 "vector_operand")
+ (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
(match_operand:V1TI 2 "nonimm_or_0_operand")
(match_operand:V1TI 3 "register_operand")))]
"TARGET_SSE2"
@@ -5181,7 +5226,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_256 0 "register_operand")
(vec_merge:VF_256
- (match_operand:VF_256 1 "nonimmediate_operand")
+ (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
(match_operand:VF_256 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
@@ -5194,7 +5239,7 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
- (match_operand:VF_128 1 "vector_operand")
+ (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
(match_operand:VF_128 2 "nonimm_or_0_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE"
@@ -5573,7 +5618,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
@@ -5630,7 +5675,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx_noavx512vl,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx_noavx512f,avx512vl,avx512f")
(set_attr "addr" "*,gpr16,*,*")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
@@ -5703,7 +5748,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
@@ -5765,7 +5810,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
(if_then_else
@@ -5811,15 +5856,10 @@
(V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
- (V8BF "TARGET_AVX10_2")
- (V16BF "TARGET_AVX10_2")
- (V32BF "TARGET_AVX10_2")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32HF "TARGET_AVX512FP16")
+ (V8BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") (V32BF "TARGET_AVX10_2")])
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
@@ -5857,8 +5897,7 @@
(V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
(V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
(define_mode_iterator FMAMODE
[SF DF V4SF V2DF V8SF V4DF])
@@ -5928,14 +5967,12 @@
;; Suppose AVX-512F as baseline
(define_mode_iterator VFH_SF_AVX512VL
- [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+ [(V32HF "TARGET_AVX512FP16")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
(HF "TARGET_AVX512FP16")
- SF (V16SF "TARGET_EVEX512")
- (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
- DF (V8DF "TARGET_EVEX512")
- (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+ SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
@@ -8683,7 +8720,7 @@
(unspec:V16SI
[(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -8751,7 +8788,7 @@
(unspec:V16SI
[(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_VCVTT_U))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttps2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -8761,7 +8798,7 @@
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_fix:V16SI
(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9349,7 +9386,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtdq2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9385,7 +9422,7 @@
(unspec:V8SI
[(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9544,7 +9581,7 @@
(unspec:V8SI
[(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_VCVTT_U))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttpd2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -9554,7 +9591,7 @@
[(set (match_operand:V8SI 0 "register_operand" "=v")
(any_fix:V8SI
(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -10070,7 +10107,7 @@
[(set (match_operand:V8SF 0 "register_operand" "=v")
(float_truncate:V8SF
(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -10232,7 +10269,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -10438,7 +10475,7 @@
(set (match_operand:V8DF 0 "register_operand")
(float_extend:V8DF
(match_dup 2)))]
-"TARGET_AVX512F && TARGET_EVEX512"
+"TARGET_AVX512F"
"operands[2] = gen_reg_rtx (V8SFmode);")
(define_expand "vec_unpacks_lo_v4sf"
@@ -10576,7 +10613,7 @@
(set (match_operand:V8DF 0 "register_operand")
(float:V8DF
(match_dup 2)))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"operands[2] = gen_reg_rtx (V8SImode);")
(define_expand "vec_unpacks_float_lo_v16si"
@@ -10588,7 +10625,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "vec_unpacku_float_hi_v4si"
[(set (match_dup 5)
@@ -10684,7 +10721,7 @@
(define_expand "vec_unpacku_float_hi_v16si"
[(match_operand:V8DF 0 "register_operand")
(match_operand:V16SI 1 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
REAL_VALUE_TYPE TWO32r;
rtx k, x, tmp[4];
@@ -10733,7 +10770,7 @@
(define_expand "vec_unpacku_float_lo_v16si"
[(match_operand:V8DF 0 "register_operand")
(match_operand:V16SI 1 "nonimmediate_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
REAL_VALUE_TYPE TWO32r;
rtx k, x, tmp[3];
@@ -10827,7 +10864,7 @@
[(match_operand:V16SI 0 "register_operand")
(match_operand:V8DF 1 "nonimmediate_operand")
(match_operand:V8DF 2 "nonimmediate_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx r1, r2;
@@ -10942,7 +10979,7 @@
[(match_operand:V16SI 0 "register_operand")
(match_operand:V8DF 1 "nonimmediate_operand")
(match_operand:V8DF 2 "nonimmediate_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx r1, r2;
@@ -11135,7 +11172,7 @@
(const_int 11) (const_int 27)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -11223,7 +11260,7 @@
(const_int 9) (const_int 25)
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -11363,7 +11400,7 @@
(const_int 11) (const_int 11)
(const_int 13) (const_int 13)
(const_int 15) (const_int 15)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
@@ -11416,7 +11453,7 @@
(const_int 10) (const_int 10)
(const_int 12) (const_int 12)
(const_int 14) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
@@ -12376,9 +12413,7 @@
(V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")])
(define_mode_iterator AVX512_VEC
- [(V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
- (V8DI "TARGET_AVX512DQ && TARGET_EVEX512")
- (V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+ [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
[(match_operand:<ssequartermode> 0 "nonimmediate_operand")
@@ -12547,9 +12582,7 @@
[(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
(define_mode_iterator AVX512_VEC_2
- [(V16SF "TARGET_AVX512DQ && TARGET_EVEX512")
- (V16SI "TARGET_AVX512DQ && TARGET_EVEX512")
- (V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+ [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
@@ -13110,7 +13143,7 @@
(const_int 26) (const_int 27)
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
if (TARGET_AVX512VL
@@ -13159,7 +13192,7 @@
(const_int 58) (const_int 59)
(const_int 60) (const_int 61)
(const_int 62) (const_int 63)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
[(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
@@ -13257,15 +13290,15 @@
;; Modes handled by vec_extract patterns.
(define_mode_iterator VEC_EXTRACT_MODE
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF
- (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
(define_expand "vec_extract<mode><ssescalarmodelower>"
[(match_operand:<ssescalarmode> 0 "register_operand")
@@ -13307,7 +13340,7 @@
(const_int 3) (const_int 11)
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -13421,9 +13454,9 @@
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
- [(set_attr "type" "sselog1")
+ [(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
@@ -13437,7 +13470,7 @@
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -13454,7 +13487,7 @@
(const_int 2) (const_int 6)])))]
"TARGET_AVX && <mask_avx512vl_condition>"
"vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
- [(set_attr "type" "sselog1")
+ [(set_attr "type" "ssemov")
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "V4DF")])
@@ -13649,7 +13682,7 @@
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
/* Disallow embeded broadcast for vector HFmode since
it's not real AVX512FP16 instruction. */
&& (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
@@ -13731,7 +13764,7 @@
[(set (match_operand:V 0 "register_operand")
(match_operand:V 1 "ternlog_operand"))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -13761,7 +13794,7 @@
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")
(match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -13846,7 +13879,7 @@
(match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -13930,7 +13963,7 @@
(match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -14080,7 +14113,7 @@
(match_operand:SI 3 "const_0_to_255_operand")
(match_operand:V16SF 4 "register_operand")
(match_operand:HI 5 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask = INTVAL (operands[3]);
emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
@@ -14267,7 +14300,7 @@
(match_operand 16 "const_12_to_15_operand")
(match_operand 17 "const_28_to_31_operand")
(match_operand 18 "const_28_to_31_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
&& INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
&& INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
@@ -14302,7 +14335,7 @@
(match_operand:SI 3 "const_0_to_255_operand")
(match_operand:V8DF 4 "register_operand")
(match_operand:QI 5 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask = INTVAL (operands[3]);
emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
@@ -14332,7 +14365,7 @@
(match_operand 8 "const_12_to_13_operand")
(match_operand 9 "const_6_to_7_operand")
(match_operand 10 "const_14_to_15_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask;
mask = INTVAL (operands[3]);
@@ -14464,7 +14497,7 @@
(const_int 3) (const_int 11)
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -14514,7 +14547,7 @@
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -14880,7 +14913,7 @@
(set_attr "mode" "V2DF,DF,V8DF")
(set (attr "enabled")
(cond [(eq_attr "alternative" "2")
- (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+ (symbol_ref "TARGET_AVX512F
&& !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
(match_test "<mask_avx512vl_condition>")
(const_string "*")
@@ -14965,13 +14998,13 @@
[(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
(truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
[(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
(any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -14993,7 +15026,7 @@
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
- "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512BW && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15018,7 +15051,7 @@
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
- "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512BW && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15102,7 +15135,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15118,7 +15151,7 @@
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
(match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -15132,19 +15165,19 @@
(match_operand:<pmov_src_mode> 1 "register_operand"))
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "truncv32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand")
(truncate:V32QI
(match_operand:V32HI 1 "register_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512")
+ "TARGET_AVX512BW")
(define_insn "avx512bw_<code>v32hiv32qi2"
[(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
(any_truncate:V32QI
(match_operand:V32HI 1 "register_operand" "v,v")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -15174,7 +15207,7 @@
(const_int 26) (const_int 27)
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
- "TARGET_AVX512VBMI && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512VBMI && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -15190,7 +15223,7 @@
(match_operand:V32HI 1 "register_operand" "v,v"))
(match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
(match_operand:SI 3 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
@@ -15204,7 +15237,7 @@
(match_operand:V32HI 1 "register_operand"))
(match_dup 0)
(match_operand:SI 2 "register_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512")
+ "TARGET_AVX512BW")
(define_mode_iterator PMOV_DST_MODE_2
[V4SI V8HI (V16QI "TARGET_AVX512BW")])
@@ -16062,7 +16095,7 @@
[(set (match_operand:V8QI 0 "register_operand")
(truncate:V8QI
(match_operand:V8DI 1 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx op0 = gen_reg_rtx (V16QImode);
@@ -16082,7 +16115,7 @@
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -16092,7 +16125,7 @@
[(set (match_operand:V8QI 0 "memory_operand" "=m")
(any_truncate:V8QI
(match_operand:V8DI 1 "register_operand" "v")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
@@ -16104,7 +16137,7 @@
(subreg:DI
(any_truncate:V8QI
(match_operand:V8DI 1 "register_operand")) 0))]
- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -16128,7 +16161,7 @@
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -16149,7 +16182,7 @@
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)
(const_int 0) (const_int 0)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -16162,7 +16195,7 @@
(match_operand:V8DI 1 "register_operand" "v"))
(match_dup 0)
(match_operand:QI 2 "register_operand" "Yk")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
@@ -16174,7 +16207,7 @@
(any_truncate:V8QI
(match_operand:V8DI 1 "register_operand"))
(match_operand:QI 2 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
operands[0] = adjust_address_nv (operands[0], V8QImode, 0);
emit_insn (gen_avx512f_<code>v8div16qi2_mask_store_1 (operands[0],
@@ -16431,7 +16464,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
(define_insn "*vec_widen_umult_even_v16si<mask_name>"
@@ -16451,7 +16484,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
@@ -16547,7 +16580,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
(define_insn "*vec_widen_smult_even_v16si<mask_name>"
@@ -16567,7 +16600,7 @@
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
@@ -16969,7 +17002,7 @@
"TARGET_SSE2"
{
/* Try with vnni instructions. */
- if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI && TARGET_EVEX512)
+ if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI)
|| (<MODE_SIZE> < 64
&& ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)))
{
@@ -17062,7 +17095,7 @@
(match_operand:V64QI 1 "register_operand")
(match_operand:V64QI 2 "nonimmediate_operand")
(match_operand:V16SI 3 "nonimmediate_operand")]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
{
rtx t1 = gen_reg_rtx (V8DImode);
rtx t2 = gen_reg_rtx (V16SImode);
@@ -18300,13 +18333,10 @@
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
(V16HF "TARGET_AVX512FP16")
- (V16SF "TARGET_AVX512F && TARGET_EVEX512")
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V16SI "TARGET_AVX512F && TARGET_EVEX512")
- (V8DI "TARGET_AVX512F && TARGET_EVEX512")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
- (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
- (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")
+ (V32HF "TARGET_AVX512FP16")])
(define_expand "vec_perm<mode>"
[(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -18333,7 +18363,7 @@
{
operands[2] = CONSTM1_RTX (<MODE>mode);
- if (!TARGET_AVX512F || (!TARGET_AVX512VL && !TARGET_EVEX512))
+ if (!TARGET_AVX512F)
operands[2] = force_reg (<MODE>mode, operands[2]);
})
@@ -18342,7 +18372,6 @@
(xor:VI (match_operand:VI 1 "bcst_vector_operand" " 0, m,Br")
(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))]
"TARGET_AVX512F
- && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)
&& (!<mask_applied>
|| <ssescalarmode>mode == SImode
|| <ssescalarmode>mode == DImode)"
@@ -18409,7 +18438,7 @@
(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))
(unspec [(match_operand:VI 3 "register_operand" "0,0,0")]
UNSPEC_INSN_FALSE_DEP)]
- "TARGET_AVX512F && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)"
+ "TARGET_AVX512F"
{
if (TARGET_AVX512VL)
return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
@@ -18433,7 +18462,7 @@
(not:<ssescalarmode>
(match_operand:<ssescalarmode> 1 "nonimmediate_operand"))))]
"<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
[(set (match_dup 0)
(xor:VI48_AVX512F
(vec_duplicate:VI48_AVX512F (match_dup 1))
@@ -18587,8 +18616,7 @@
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(eq_attr "alternative" "4")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512
- && !TARGET_PREFER_AVX256)")
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
]
(const_string "*")))])
@@ -18632,7 +18660,7 @@
(match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
(match_operand:VI 2 "vector_operand")))]
"<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
[(set (match_dup 3)
(vec_duplicate:VI (match_dup 1)))
(set (match_dup 0)
@@ -18647,7 +18675,7 @@
(match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
(match_operand:VI 2 "vector_operand")))]
"<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
[(set (match_dup 3)
(vec_duplicate:VI (match_dup 1)))
(set (match_dup 0)
@@ -18941,7 +18969,7 @@
(match_operand:VI 1 "bcst_vector_operand" "0,m, 0,vBr"))
(match_operand:VI 2 "bcst_vector_operand" "m,0,vBr, 0")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
@@ -18974,7 +19002,7 @@
(match_operand:VI 1 "bcst_vector_operand" "%0, 0")
(match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
@@ -19005,7 +19033,7 @@
(not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0"))
(not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
@@ -19027,7 +19055,7 @@
(const_string "*")))])
(define_mode_iterator AVX512ZEXTMASK
- [(DI "TARGET_AVX512BW && TARGET_EVEX512") (SI "TARGET_AVX512BW") HI])
+ [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
@@ -19276,7 +19304,7 @@
(const_int 60) (const_int 61)
(const_int 62) (const_int 63)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix>")
@@ -19345,7 +19373,7 @@
(const_int 14) (const_int 15)
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix>")
@@ -19407,7 +19435,7 @@
(const_int 61) (const_int 125)
(const_int 62) (const_int 126)
(const_int 63) (const_int 127)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -19503,7 +19531,7 @@
(const_int 53) (const_int 117)
(const_int 54) (const_int 118)
(const_int 55) (const_int 119)])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -19727,7 +19755,7 @@
(const_int 11) (const_int 27)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -19782,7 +19810,7 @@
(const_int 9) (const_int 25)
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -20488,7 +20516,7 @@
(match_operand:SI 2 "const_0_to_255_operand")
(match_operand:V16SI 3 "register_operand")
(match_operand:HI 4 "register_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int mask = INTVAL (operands[2]);
emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
@@ -20532,7 +20560,7 @@
(match_operand 15 "const_12_to_15_operand")
(match_operand 16 "const_12_to_15_operand")
(match_operand 17 "const_12_to_15_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512
+ "TARGET_AVX512F
&& INTVAL (operands[2]) + 4 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 4 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 4 == INTVAL (operands[8])
@@ -20698,7 +20726,7 @@
[(match_operand:V32HI 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_PSHUFLW))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -20874,7 +20902,7 @@
[(match_operand:V32HI 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_PSHUFHW))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -21408,7 +21436,7 @@
(match_operand:V4TI 1 "register_operand" "v")
(parallel
[(match_operand:SI 2 "const_0_to_3_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
@@ -21416,7 +21444,7 @@
(set_attr "mode" "XI")])
(define_mode_iterator VEXTRACTI128_MODE
- [(V4TI "TARGET_AVX512F && TARGET_EVEX512") V2TI])
+ [(V4TI "TARGET_AVX512F") V2TI])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand")
@@ -21439,7 +21467,7 @@
&& VECTOR_MODE_P (GET_MODE (operands[1]))
&& ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
|| (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
- || (TARGET_AVX512F && TARGET_EVEX512
+ || (TARGET_AVX512F
&& GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
&& (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
[(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
@@ -22814,7 +22842,7 @@
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
(set_attr "prefix" "evex")
@@ -23328,10 +23356,10 @@
;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
;; modes for abs instruction on pre AVX-512 targets.
(define_mode_iterator VI1248_AVX512VL_AVX512BW
- [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL")])
(define_insn "*abs<mode>2"
@@ -24159,7 +24187,7 @@
[(set (match_operand:V32HI 0 "register_operand" "=v")
(any_extend:V32HI
(match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24173,7 +24201,7 @@
(match_operand:V64QI 2 "const0_operand"))
(match_parallel 3 "pmovzx_parallel"
[(match_operand 4 "const_int_operand")])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24193,7 +24221,7 @@
(match_operand:V64QI 3 "const0_operand"))
(match_parallel 4 "pmovzx_parallel"
[(match_operand 5 "const_int_operand")])))]
- "TARGET_AVX512BW && TARGET_EVEX512"
+ "TARGET_AVX512BW"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24206,7 +24234,7 @@
[(set (match_operand:V32HI 0 "register_operand")
(any_extend:V32HI
(match_operand:V32QI 1 "nonimmediate_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512")
+ "TARGET_AVX512BW")
(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
[(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
@@ -24354,7 +24382,7 @@
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24364,7 +24392,7 @@
[(set (match_operand:V16SI 0 "register_operand")
(any_extend:V16SI
(match_operand:V16QI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "avx2_<code>v8qiv8si2<mask_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
@@ -24497,7 +24525,7 @@
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24507,7 +24535,7 @@
[(set (match_operand:V16SI 0 "register_operand")
(any_extend:V16SI
(match_operand:V16HI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
[(set (match_operand:V32HI 0 "register_operand" "=v")
@@ -24517,7 +24545,7 @@
(match_operand:V32HI 2 "const0_operand"))
(match_parallel 3 "pmovzx_parallel"
[(match_operand 4 "const_int_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
@@ -24741,7 +24769,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24751,7 +24779,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8QI 1 "memory_operand" "m")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24769,7 +24797,7 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+ "TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
@@ -24780,7 +24808,7 @@
[(set (match_operand:V8DI 0 "register_operand")
(any_extend:V8DI
(match_operand:V8QI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
if (!MEM_P (operands[1]))
{
@@ -24922,7 +24950,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -24932,7 +24960,7 @@
[(set (match_operand:V8DI 0 "register_operand")
(any_extend:V8DI
(match_operand:V8HI 1 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "avx2_<code>v4hiv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25059,7 +25087,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -25073,7 +25101,7 @@
(match_operand:V16SI 2 "const0_operand"))
(match_parallel 3 "pmovzx_parallel"
[(match_operand 4 "const_int_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -25092,7 +25120,7 @@
(match_operand:V16SI 3 "const0_operand"))
(match_parallel 4 "pmovzx_parallel"
[(match_operand 5 "const_int_operand")])))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"#"
"&& reload_completed"
[(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -25104,7 +25132,7 @@
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_insn "avx2_<code>v4siv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25505,7 +25533,7 @@
[(match_operand:V16SI 0 "register_operand")
(match_operand:V16SF 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_15_operand")]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
rtx tmp = gen_reg_rtx (V16SFmode);
emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
@@ -26723,7 +26751,7 @@
(ashiftrt:V8DI
(match_operand:V8DI 1 "register_operand")
(match_operand:V8DI 2 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "vashrv4di3"
[(set (match_operand:V4DI 0 "register_operand")
@@ -26814,7 +26842,7 @@
[(set (match_operand:V16SI 0 "register_operand")
(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
(match_operand:V16SI 2 "nonimmediate_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512")
+ "TARGET_AVX512F")
(define_expand "vashrv8si3"
[(set (match_operand:V8SI 0 "register_operand")
@@ -27257,12 +27285,12 @@
(set_attr "mode" "OI")])
(define_mode_attr pbroadcast_evex_isa
- [(V64QI "avx512bw_512") (V32QI "avx512bw") (V16QI "avx512bw")
- (V32HI "avx512bw_512") (V16HI "avx512bw") (V8HI "avx512bw")
- (V16SI "avx512f_512") (V8SI "avx512f") (V4SI "avx512f")
- (V8DI "avx512f_512") (V4DI "avx512f") (V2DI "avx512f")
- (V32HF "avx512bw_512") (V16HF "avx512bw") (V8HF "avx512bw")
- (V32BF "avx512bw_512") (V16BF "avx512bw") (V8BF "avx512bw")])
+ [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+ (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+ (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+ (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")
+ (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")
+ (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")])
(define_insn "avx2_pbroadcast<mode>"
[(set (match_operand:VIHFBF 0 "register_operand" "=x,v")
@@ -27806,7 +27834,7 @@
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
- && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+ && !TARGET_PREFER_AVX256")
(const_string "*")))])
(define_insn "*vec_dupv4si"
@@ -27834,7 +27862,7 @@
(set (attr "enabled")
(if_then_else (eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
- && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+ && !TARGET_PREFER_AVX256")
(const_string "*")))])
(define_insn "*vec_dupv2di"
@@ -27849,7 +27877,7 @@
%vmovddup\t{%1, %0|%0, %1}
movlhps\t%0, %0"
[(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx")
- (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov")
+ (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov")
(set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig")
(set (attr "mode")
(cond [(and (eq_attr "alternative" "2")
@@ -27865,8 +27893,7 @@
(if_then_else
(eq_attr "alternative" "2")
(symbol_ref "TARGET_AVX512VL
- || (TARGET_AVX512F && TARGET_EVEX512
- && !TARGET_PREFER_AVX256)")
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
(const_string "*")))])
(define_insn "avx2_vbroadcasti128_<mode>"
@@ -27946,7 +27973,7 @@
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_evex")
- (set_attr "isa" "avx2,noavx2,avx2,avx512f_512,noavx2")
+ (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
(set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
(define_split
@@ -28010,8 +28037,8 @@
;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
(define_mode_iterator VI4F_BRCST32x2
- [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
- (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL")])
(define_mode_attr 64x2mode
[(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
@@ -28061,8 +28088,7 @@
;; For broadcast[i|f]64x2
(define_mode_iterator VI8F_BRCST64x2
- [(V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
+ [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
[(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
@@ -28118,27 +28144,26 @@
(set_attr "mode" "<sseinsnmode>")])
(define_mode_iterator VPERMI2
- [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
- (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+ [V16SI V16SF V8DI V8DF
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
(V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ (V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
(V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
- (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+ (V64QI "TARGET_AVX512VBMI")
(V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
(V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
(define_mode_iterator VPERMI2I
- [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+ [V16SI V8DI
(V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
(V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
- (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+ (V32HI "TARGET_AVX512BW")
(V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
(V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
- (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+ (V64QI "TARGET_AVX512VBMI")
(V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
(V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
@@ -28813,29 +28838,28 @@
;; Modes handled by vec_init expanders.
(define_mode_iterator VEC_INIT_MODE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512")
- (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
- (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
;; Likewise, but for initialization from half sized vectors.
;; Thus, these are all VEC_INIT_MODE modes except V2??.
(define_mode_iterator VEC_INIT_HALF_MODE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")
- (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
- (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
- (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
- (V4TI "TARGET_AVX512F && TARGET_EVEX512")])
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
+ (V4TI "TARGET_AVX512F")])
(define_expand "vec_init<mode><ssescalarmodelower>"
[(match_operand:VEC_INIT_MODE 0 "register_operand")
@@ -29096,7 +29120,7 @@
(unspec:V16SF
[(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_VCVTPH2PS))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -29186,7 +29210,7 @@
UNSPEC_VCVTPS2PH)
(match_operand:V16HI 3 "nonimm_or_0_operand")
(match_operand:HI 4 "register_operand")))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
{
int round = INTVAL (operands[2]);
/* Separate {sae} from rounding control imm,
@@ -29205,7 +29229,7 @@
[(match_operand:V16SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_VCVTPS2PH))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -29217,7 +29241,7 @@
[(match_operand:V16SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_VCVTPS2PH))]
- "TARGET_AVX512F && TARGET_EVEX512"
+ "TARGET_AVX512F"
"vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
@@ -30196,7 +30220,7 @@
(match_operand:V8DI 2 "register_operand" "v")
(match_operand:V8DI 3 "nonimmediate_operand" "vm")]
VPMADD52))]
- "TARGET_AVX512IFMA && TARGET_EVEX512"
+ "TARGET_AVX512IFMA"
"vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "prefix" "evex")
@@ -30567,7 +30591,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPBUSD))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30636,7 +30660,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPBUSDS))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30705,7 +30729,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPWSSD))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30774,7 +30798,7 @@
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPDPWSSDS))]
- "TARGET_AVX512VNNI && TARGET_EVEX512"
+ "TARGET_AVX512VNNI"
"vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@@ -30930,8 +30954,7 @@
(set_attr "mode" "<sseinsnmode>")])
(define_mode_iterator VI48_AVX512VP2VL
- [(V8DI "TARGET_EVEX512")
- (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
(V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
(define_mode_iterator MASK_DWI [P2QI P2HI])
@@ -30973,12 +30996,12 @@
(unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
(match_operand:V16SI 2 "vector_operand" "vm")]
UNSPEC_VP2INTERSECT))]
- "TARGET_AVX512VP2INTERSECT && TARGET_EVEX512"
+ "TARGET_AVX512VP2INTERSECT"
"vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr ("prefix") ("evex"))])
(define_mode_iterator VF_AVX512BF16VL
- [(V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+ [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
;; Converting from BF to SF
(define_mode_attr bf16_cvt_2sf
[(V32BF "V16SF") (V16BF "V8SF") (V8BF "V4SF")])
@@ -31098,7 +31121,7 @@
"vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
(define_mode_iterator VF1_AVX512_256
- [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+ [V16SF (V8SF "TARGET_AVX512VL")])
(define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
[(match_operand:<sf_cvt_bf16> 0 "register_operand")
@@ -31144,7 +31167,7 @@
[(set (match_operand:V16BF 0 "register_operand")
(float_truncate:V16BF
(match_operand:V16SF 1 "nonimmediate_operand")))]
- "TARGET_AVX512BW && TARGET_EVEX512
+ "TARGET_AVX512BW
&& !HONOR_NANS (BFmode) && !flag_rounding_math
&& (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
{
@@ -31428,10 +31451,10 @@
;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15
(define_mode_iterator INT_BROADCAST_MODE
- [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
- (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
- (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
- (V8DI "TARGET_AVX512F && TARGET_EVEX512 && TARGET_64BIT")
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F && TARGET_64BIT")
(V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
;; Broadcast from an integer. NB: Enable broadcast only if we can move
@@ -31705,8 +31728,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>"
- [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
- (vec_concat:VHF_AVX10_2
+ [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+ (vec_concat:VHF_AVX512VL
(float_truncate:<ssehalfvecmode>
(match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>"))
(float_truncate:<ssehalfvecmode>
@@ -31730,8 +31753,8 @@
(define_insn "vcvt<convertfp8_pack><mode><mask_name>"
[(set (match_operand:<ssebvecmode> 0 "register_operand" "=v")
(unspec:<ssebvecmode>
- [(match_operand:VHF_AVX10_2 1 "register_operand" "v")
- (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VHF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_CONVERTFP8_PACK))]
"TARGET_AVX10_2"
"vcvt<convertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}"
@@ -31814,7 +31837,7 @@
[(set_attr "prefix" "evex")])
(define_mode_iterator VHF_AVX10_2_2
- [(V32HF "TARGET_AVX10_2") V16HF])
+ [V32HF V16HF])
(define_insn "vcvt<biasph2fp8_pack><mode><mask_name>"
[(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
@@ -31911,8 +31934,8 @@
[(set_attr "prefix" "evex")])
(define_insn "vcvthf82ph<mode><mask_name>"
- [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
- (unspec:VHF_AVX10_2
+ [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VHF_AVX512VL
[(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")]
UNSPEC_VCVTHF82PH))]
"TARGET_AVX10_2"
@@ -31934,8 +31957,8 @@
(define_expand "usdot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX10_2 1 "register_operand")
- (match_operand:VI2_AVX10_2 2 "register_operand")
+ (match_operand:VI2_AVX512F 1 "register_operand")
+ (match_operand:VI2_AVX512F 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
{
@@ -31952,8 +31975,8 @@
(define_expand "udot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX10_2 1 "register_operand")
- (match_operand:VI2_AVX10_2 2 "register_operand")
+ (match_operand:VI2_AVX512F 1 "register_operand")
+ (match_operand:VI2_AVX512F 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
{
@@ -32032,23 +32055,23 @@
[(set_attr "prefix" "evex")])
(define_insn "vdpphps_<mode>"
- [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
- (unspec:VF1_AVX10_2
- [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
- (match_operand:VF1_AVX10_2 2 "register_operand" "v")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VDPPHPS))]
"TARGET_AVX10_2"
"vdpphps\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "prefix" "evex")])
(define_insn "vdpphps_<mode>_mask"
- [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VF1_AVX10_2
- (unspec:VF1_AVX10_2
- [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
- (match_operand:VF1_AVX10_2 2 "register_operand" "v")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF1_AVX512VL
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VDPPHPS)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -32057,10 +32080,10 @@
[(set_attr "prefix" "evex")])
(define_expand "vdpphps_<mode>_maskz"
- [(match_operand:VF1_AVX10_2 0 "register_operand")
- (match_operand:VF1_AVX10_2 1 "register_operand")
- (match_operand:VF1_AVX10_2 2 "register_operand")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VF1_AVX512VL 0 "register_operand")
+ (match_operand:VF1_AVX512VL 1 "register_operand")
+ (match_operand:VF1_AVX512VL 2 "register_operand")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32070,60 +32093,60 @@
})
(define_insn "vdpphps_<mode>_maskz_1"
- [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VF1_AVX10_2
- (unspec:VF1_AVX10_2
- [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
- (match_operand:VF1_AVX10_2 2 "register_operand" "v")
- (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF1_AVX512VL
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+ (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VDPPHPS)
- (match_operand:VF1_AVX10_2 4 "const0_operand" "C")
+ (match_operand:VF1_AVX512VL 4 "const0_operand" "C")
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
"TARGET_AVX10_2"
"vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_scalefbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")]
UNSPEC_VSCALEFBF16))]
"TARGET_AVX10_2"
"vscalefbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")])
(define_expand "<code><mode>3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand")
- (smaxmin:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))]
+ [(set (match_operand:VBF 0 "register_operand")
+ (smaxmin:VBF
+ (match_operand:VBF 1 "register_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")))]
"TARGET_AVX10_2")
(define_insn "avx10_2_<code>bf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (smaxmin:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (smaxmin:VBF
+ (match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2"
"v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
(define_insn "avx10_2_<insn>bf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (plusminusmultdiv:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (plusminusmultdiv:VBF
+ (match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2"
"v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")])
(define_expand "avx10_2_fmaddbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32135,11 +32158,11 @@
})
(define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
"TARGET_AVX10_2"
"@
vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32150,12 +32173,12 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmaddbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32167,12 +32190,12 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmaddbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32182,10 +32205,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx10_2_fnmaddbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32197,12 +32220,12 @@
})
(define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
"TARGET_AVX10_2"
"@
vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32213,13 +32236,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmaddbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32231,13 +32254,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmaddbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (match_operand:VBF 3 "nonimmediate_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32247,10 +32270,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx10_2_fmsubbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32262,12 +32285,12 @@
})
(define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
"TARGET_AVX10_2"
"@
vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32278,13 +32301,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmsubbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32296,13 +32319,13 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fmsubbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32312,10 +32335,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx10_2_fnmsubbf16_<mode>_maskz"
- [(match_operand:VBF_AVX10_2 0 "register_operand")
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ [(match_operand:VBF 0 "register_operand")
+ (match_operand:VBF 1 "nonimmediate_operand")
+ (match_operand:VBF 2 "nonimmediate_operand")
+ (match_operand:VBF 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX10_2"
{
@@ -32327,13 +32350,13 @@
})
(define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+ [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
"TARGET_AVX10_2"
"@
vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32344,14 +32367,14 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmsubbf16_<mode>_mask"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+ [(set (match_operand:VBF 0 "register_operand" "=v,v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX10_2"
@@ -32363,14 +32386,14 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_fnmsubbf16_<mode>_mask3"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (vec_merge:VBF_AVX10_2
- (fma:VBF_AVX10_2
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
- (neg:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")))
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (vec_merge:VBF
+ (fma:VBF
+ (neg:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
+ (neg:VBF
+ (match_operand:VBF 3 "nonimmediate_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX10_2"
@@ -32380,35 +32403,35 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_rsqrtbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
UNSPEC_RSQRT))]
"TARGET_AVX10_2"
"vrsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_sqrtbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (sqrt:VBF_AVX10_2
- (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (sqrt:VBF
+ (match_operand:VBF 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX10_2"
"vsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_rcpbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
UNSPEC_RCP))]
"TARGET_AVX10_2"
"vrcpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_getexpbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
UNSPEC_GETEXP))]
"TARGET_AVX10_2"
"vgetexpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -32425,9 +32448,9 @@
(UNSPEC_VGETMANTBF16 "getmant")])
(define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
BF16IMMOP))]
"TARGET_AVX10_2"
@@ -32437,7 +32460,7 @@
(define_insn "avx10_2_fpclassbf16_<mode><mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
- [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+ [(match_operand:VBF 1 "nonimmediate_operand" "vm")
(match_operand 2 "const_0_to_255_operand")]
UNSPEC_VFPCLASSBF16))]
"TARGET_AVX10_2"
@@ -32447,8 +32470,8 @@
(define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
- [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+ [(match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "nonimmediate_operand" "vm")
(match_operand 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP))]
"TARGET_AVX10_2"
@@ -32486,7 +32509,7 @@
(define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs<mode><mask_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")]
+ [(match_operand:VBF 1 "vector_operand" "vm")]
UNSPEC_CVT_BF16_IBS_ITER))]
"TARGET_AVX10_2"
"vcvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -32501,7 +32524,7 @@
(define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+ [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_CVT_PH_IBS_ITER))]
"TARGET_AVX10_2 && <round_mode512bit_condition>"
"vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -32516,7 +32539,7 @@
(define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VHF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_CVTT_PH_IBS_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32531,7 +32554,7 @@
(define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+ [(match_operand:VF1_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
UNSPEC_CVT_PS_IBS_ITER))]
"TARGET_AVX10_2 && <round_mode512bit_condition>"
"vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -32546,7 +32569,7 @@
(define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unspec:<sseintvecmode>
- [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VF1_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_CVTT_PS_IBS_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32565,7 +32588,7 @@
(define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v")
(unspec:<VEC_GATHER_IDXSI>
- [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_SAT_CVT_DS_SIGN_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32576,7 +32599,7 @@
(define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
[(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v")
(unspec:<VEC_GATHER_IDXDI>
- [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ [(match_operand:VF2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_SAT_CVT_DS_SIGN_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
"vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32585,8 +32608,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v")
- (unspec:VI8_AVX10_2
+ [(set (match_operand:VI8 0 "register_operand" "=v")
+ (unspec:VI8
[(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_SAT_CVT_DS_SIGN_ITER))]
"TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
@@ -32622,10 +32645,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "avx10_2_minmaxbf16_<mode><mask_name>"
- [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
- (unspec:VBF_AVX10_2
- [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
- (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
+ [(set (match_operand:VBF 0 "register_operand" "=v")
+ (unspec:VBF
+ [(match_operand:VBF 1 "register_operand" "v")
+ (match_operand:VBF 2 "bcst_vector_operand" "vmBr")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_MINMAXBF16))]
"TARGET_AVX10_2"
@@ -32634,10 +32657,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v")
- (unspec:VFH_AVX10_2
- [(match_operand:VFH_AVX10_2 1 "register_operand" "v")
- (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
+ (unspec:VFH_AVX512VL
+ [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+ (match_operand:VFH_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_MINMAX))]
"TARGET_AVX10_2"
@@ -32661,9 +32684,9 @@
(set_attr "mode" "<ssescalarmode>")])
(define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>"
- [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v")
- (unspec:VI1248_AVX10_2
- [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")]
+ [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand" "=v")
+ (unspec:VI1248_AVX512VLBW
+ [(match_operand:VI1248_AVX512VLBW 1 "memory_operand" "m")]
UNSPEC_VMOVRS))]
"TARGET_AVX10_2 && TARGET_MOVRS"
"vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index 15d8e96..64f3c20 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B)
#endif /* __DISABLE_VAES__ */
-#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VAES__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("vaes,avx512f,evex512")
+#pragma GCC target("vaes,avx512f")
#define __DISABLE_VAESF__
#endif /* __VAES__ */
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index 2b36c37..a02ab38 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -28,9 +28,9 @@
#ifndef _VPCLMULQDQINTRIN_H_INCLUDED
#define _VPCLMULQDQINTRIN_H_INCLUDED
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx512f,evex512")
+#pragma GCC target("vpclmulqdq,avx512f")
#define __DISABLE_VPCLMULQDQF__
#endif /* __VPCLMULQDQF__ */
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb73..b08081e 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
in 128bit, 256bit and 512bit */
4, 4, 6, /* cost of moving XMM,YMM,ZMM register */
4, /* cost of moving SSE register to integer. */
+ 4, /* cost of moving integer register to SSE. */
COSTS_N_BYTES (5), 0, /* Gather load static, per_elt. */
COSTS_N_BYTES (5), 0, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -121,16 +122,24 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
- COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
- COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_BYTES (2), /* cost of MULSS instruction. */
- COSTS_N_BYTES (2), /* cost of MULSD instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
- COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
- COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
- COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */
+ COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_BYTES (4), /* cost of MULSS instruction. */
+ COSTS_N_BYTES (4), /* cost of MULSD instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SS instruction. */
+ COSTS_N_BYTES (4), /* cost of FMA SD instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSS instruction. */
+ COSTS_N_BYTES (4), /* cost of DIVSD instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */
+ COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
+
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -219,6 +228,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -243,6 +253,13 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -330,6 +347,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
4, /* size of l1 cache. 486 has 8kB cache
@@ -356,6 +374,13 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -443,6 +468,7 @@ struct processor_costs pentium_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -467,6 +493,13 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -547,6 +580,7 @@ struct processor_costs lakemont_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -571,6 +605,13 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -666,6 +707,7 @@ struct processor_costs pentiumpro_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -690,6 +732,13 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -776,6 +825,7 @@ struct processor_costs geode_cost = {
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -800,6 +850,13 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -886,6 +943,7 @@ struct processor_costs k6_cost = {
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -913,6 +971,13 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1002,6 +1067,7 @@ struct processor_costs athlon_cost = {
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
5, /* cost of moving SSE register to integer. */
+ 5, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1027,6 +1093,13 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1120,6 +1193,7 @@ struct processor_costs k8_cost = {
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
5, /* cost of moving SSE register to integer. */
+ 5, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1150,6 +1224,13 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1251,6 +1332,7 @@ struct processor_costs amdfam10_cost = {
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1281,6 +1363,13 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1374,6 +1463,7 @@ const struct processor_costs bdver_cost = {
{10, 10, 10, 40, 60}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
16, /* cost of moving SSE register to integer. */
+ 16, /* cost of moving integer register to SSE. */
12, 12, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
16, /* size of l1 cache. */
@@ -1405,6 +1495,13 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1518,6 +1615,7 @@ struct processor_costs znver1_cost = {
{8, 8, 8, 16, 32}, /* cost of unaligned stores. */
2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1553,6 +1651,14 @@ struct processor_costs znver1_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ /* Real latency is 4, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1677,6 +1783,7 @@ struct processor_costs znver2_cost = {
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1712,6 +1819,13 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1812,6 +1926,7 @@ struct processor_costs znver3_cost = {
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
throughput 9. Approx 7 uops do not depend on vector size and every load
is 4 uops. */
@@ -1847,6 +1962,13 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1949,6 +2071,7 @@ struct processor_costs znver4_cost = {
2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
throughput 5. Approx 7 uops do not depend on vector size and every load
is 5 uops. */
@@ -1984,6 +2107,14 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ /* Real latency is 6, but for split regs multiply cost of half op by 2. */
+ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2089,6 +2220,7 @@ struct processor_costs znver5_cost = {
2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* TODO: gather and scatter instructions are currently disabled in
x86-tune.def. In some cases they are however a win, see PR116582
@@ -2135,6 +2267,13 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
/* DIVSD has throughtput 0.13 and latency 20. */
COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2250,6 +2389,7 @@ struct processor_costs skylake_cost = {
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -2274,6 +2414,13 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2379,6 +2526,7 @@ struct processor_costs icelake_cost = {
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -2403,6 +2551,13 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2502,6 +2657,7 @@ struct processor_costs alderlake_cost = {
{8, 8, 8, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2526,6 +2682,13 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2618,6 +2781,7 @@ const struct processor_costs btver1_cost = {
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
14, /* cost of moving SSE register to integer. */
+ 14, /* cost of moving integer register to SSE. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2642,6 +2806,13 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2731,6 +2902,7 @@ const struct processor_costs btver2_cost = {
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
14, /* cost of moving SSE register to integer. */
+ 14, /* cost of moving integer register to SSE. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2755,6 +2927,13 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2843,6 +3022,7 @@ struct processor_costs pentium4_cost = {
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
20, /* cost of moving SSE register to integer. */
+ 20, /* cost of moving integer register to SSE. */
16, 16, /* Gather load static, per_elt. */
16, 16, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -2867,6 +3047,13 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -2958,6 +3145,7 @@ struct processor_costs nocona_cost = {
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
20, /* cost of moving SSE register to integer. */
+ 20, /* cost of moving integer register to SSE. */
12, 12, /* Gather load static, per_elt. */
12, 12, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -2982,6 +3170,13 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3071,6 +3266,7 @@ struct processor_costs atom_cost = {
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3095,6 +3291,13 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3184,6 +3387,7 @@ struct processor_costs slm_cost = {
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3208,6 +3412,13 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3309,6 +3520,7 @@ struct processor_costs tremont_cost = {
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3335,6 +3547,13 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3424,6 +3643,7 @@ struct processor_costs intel_cost = {
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
4, /* cost of moving SSE register to integer. */
+ 4, /* cost of moving integer register to SSE. */
6, 6, /* Gather load static, per_elt. */
6, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3448,6 +3668,13 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
@@ -3532,15 +3759,16 @@ struct processor_costs lujiazui_cost = {
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers. */
+ {6, 6, 6}, /* cost of storing integer registers. */
{6, 6, 6, 10, 15}, /* cost of loading SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
{6, 6, 6, 10, 15}, /* cost of storing SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
{6, 6, 6, 10, 15}, /* cost of unaligned loads. */
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
- 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
- 6, /* cost of moving SSE register to integer. */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
+ 6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3566,6 +3794,13 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3658,6 +3893,7 @@ struct processor_costs yongfeng_cost = {
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3682,6 +3918,13 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3774,6 +4017,7 @@ struct processor_costs shijidadao_cost = {
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3798,6 +4042,13 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -3896,6 +4147,7 @@ struct processor_costs generic_cost = {
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3922,6 +4174,13 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4022,6 +4281,7 @@ struct processor_costs core_cost = {
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2, /* cost of moving SSE register to integer. */
+ 2, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
rec. throughput 6.
So 5 uops statically and one uops per load. */
@@ -4051,6 +4311,13 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
+ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index 685a83c..61b1a26 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -80,7 +80,16 @@ ix86_issue_rate (void)
case PROCESSOR_ALDERLAKE:
case PROCESSOR_YONGFENG:
case PROCESSOR_SHIJIDADAO:
+ case PROCESSOR_SIERRAFOREST:
case PROCESSOR_GENERIC:
+ /* For znver5 decoder can handle 4 or 8 instructions per cycle,
+ op cache 12 instruction/cycle, dispatch 8 instructions
+ integer rename 8 instructions and Fp 6 instructions.
+
+ The scheduler, without understanding out of order nature of the CPU
+ is not going to be able to use more than 4 instructions since that
+ is limits of the decoders. */
+ case PROCESSOR_ZNVER5:
return 4;
case PROCESSOR_ICELAKE_CLIENT:
@@ -91,13 +100,14 @@ ix86_issue_rate (void)
return 5;
case PROCESSOR_SAPPHIRERAPIDS:
- /* For znver5 decoder can handle 4 or 8 instructions per cycle,
- op cache 12 instruction/cycle, dispatch 8 instructions
- integer rename 8 instructions and Fp 6 instructions.
-
- The scheduler, without understanding out of order nature of the CPU
- is unlikely going to be able to fill all of these. */
- case PROCESSOR_ZNVER5:
+ case PROCESSOR_GRANITERAPIDS:
+ case PROCESSOR_GRANITERAPIDS_D:
+ case PROCESSOR_DIAMONDRAPIDS:
+ case PROCESSOR_GRANDRIDGE:
+ case PROCESSOR_CLEARWATERFOREST:
+ case PROCESSOR_ARROWLAKE:
+ case PROCESSOR_ARROWLAKE_S:
+ case PROCESSOR_PANTHERLAKE:
return 6;
default:
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c857e76..91cdca7 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -87,9 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
several insns to break false dependency on the dest register for GLC
micro-architecture. */
DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC,
- "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS
- | m_GRANITERAPIDS_D | m_DIAMONDRAPIDS | m_CORE_HYBRID
- | m_CORE_ATOM)
+ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE)
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
@@ -574,6 +572,11 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
"sse_movcc_use_blendv", ~m_CORE_ATOM)
+/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI,
+ V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */
+DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF,
+ "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5)
+
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
/*****************************************************************************/
diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index 91b7310..8dab927 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -1241,8 +1241,7 @@ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
LCT_CONST, Pmode,
tga_op1, Pmode, tga_op2, Pmode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
if (GET_MODE (op0) != Pmode)
op0 = tga_ret;
@@ -1265,8 +1264,7 @@ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
LCT_CONST, Pmode,
tga_op1, Pmode, tga_op2, Pmode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
UNSPEC_LD_BASE);
@@ -1879,8 +1877,7 @@ ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
ret, const0_rtx)));
- rtx_insn *insns = get_insns ();
- end_sequence ();
+ rtx_insn *insns = end_sequence ();
emit_libcall_block (insns, cmp, cmp,
gen_rtx_fmt_ee (code, BImode, *op0, *op1));
@@ -3174,8 +3171,7 @@ spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
spill_fill_data.init_reg[iter],
disp_rtx));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
}
/* Careful for being the first insn in a sequence. */
@@ -11711,8 +11707,7 @@ expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
this *will* succeed. For V8QImode or V2SImode it may not. */
start_sequence ();
ok = expand_vec_perm_1 (&dfinal);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
if (!ok)
return false;
if (d->testing_p)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 7533e53..f62e416 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2948,9 +2948,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
- insn = get_insns ();
-
- end_sequence ();
+ insn = end_sequence ();
return insn;
}
diff --git a/gcc/config/m32r/m32r.cc b/gcc/config/m32r/m32r.cc
index a96634c..75db280 100644
--- a/gcc/config/m32r/m32r.cc
+++ b/gcc/config/m32r/m32r.cc
@@ -1150,8 +1150,7 @@ gen_split_move_double (rtx operands[])
else
gcc_unreachable ();
- val = get_insns ();
- end_sequence ();
+ val = end_sequence ();
return val;
}
diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md
index 309d895..393e0da 100644
--- a/gcc/config/m32r/m32r.md
+++ b/gcc/config/m32r/m32r.md
@@ -1554,8 +1554,7 @@
start_sequence ();
emit_insn (gen_cmp_ltusi_insn (op1, const1_rtx));
emit_insn (gen_movcc_insn (op0));
- operands[3] = get_insns ();
- end_sequence ();
+ operands[3] = end_sequence ();
}")
(define_insn "seq_insn"
@@ -1607,8 +1606,7 @@
emit_insn (gen_cmp_ltusi_insn (op3, const1_rtx));
emit_insn (gen_movcc_insn (op0));
- operands[4] = get_insns ();
- end_sequence ();
+ operands[4] = end_sequence ();
}")
(define_insn "sne_zero_insn"
diff --git a/gcc/config/m68k/m68k.cc b/gcc/config/m68k/m68k.cc
index d8fa6e0..800a385 100644
--- a/gcc/config/m68k/m68k.cc
+++ b/gcc/config/m68k/m68k.cc
@@ -2763,8 +2763,7 @@ m68k_call_tls_get_addr (rtx x, rtx eqv, enum m68k_reloc reloc)
Pmode, x, Pmode);
m68k_libcall_value_in_a0_p = false;
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
gcc_assert (can_create_pseudo_p ());
dest = gen_reg_rtx (Pmode);
@@ -2811,8 +2810,7 @@ m68k_call_m68k_read_tp (void)
a0 = emit_library_call_value (m68k_get_m68k_read_tp (), NULL_RTX, LCT_PURE,
Pmode);
m68k_libcall_value_in_a0_p = false;
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
/* Attach a unique REG_EQUIV, to allow the RTL optimizers to
share the m68k_read_tp result with other IE/LE model accesses. */
@@ -6799,8 +6797,7 @@ m68k_sched_md_init_global (FILE *sched_dump ATTRIBUTE_UNUSED,
start_sequence ();
emit_insn (gen_ib ());
- sched_ib.insn = get_insns ();
- end_sequence ();
+ sched_ib.insn = end_sequence ();
}
/* Scheduling pass is now finished. Free/reset static variables. */
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 35bf1c6..c96937f 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -4098,8 +4098,7 @@
emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
operand_subword_force (operands[1], 1, DFmode));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_insn (insns);
DONE;
@@ -4132,8 +4131,7 @@
emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
operand_subword_force (operands[1], 2, XFmode));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_insn (insns);
DONE;
@@ -4251,8 +4249,7 @@
emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
operand_subword_force (operands[1], 1, DFmode));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_insn (insns);
DONE;
@@ -4285,8 +4282,7 @@
emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
operand_subword_force (operands[1], 2, XFmode));
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_insn (insns);
DONE;
diff --git a/gcc/config/mcore/mcore.cc b/gcc/config/mcore/mcore.cc
index cd5f2c5..c4fc145 100644
--- a/gcc/config/mcore/mcore.cc
+++ b/gcc/config/mcore/mcore.cc
@@ -2984,9 +2984,7 @@ mcore_mark_dllimport (tree decl)
/* ??? At least I think that's why we do this. */
idp = get_identifier (newname);
- newrtl = gen_rtx_MEM (Pmode,
- gen_rtx_SYMBOL_REF (Pmode,
- IDENTIFIER_POINTER (idp)));
+ newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
XEXP (DECL_RTL (decl), 0) = newrtl;
}
diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc
index fc223fb..2ab5ada 100644
--- a/gcc/config/microblaze/microblaze.cc
+++ b/gcc/config/microblaze/microblaze.cc
@@ -239,6 +239,10 @@ section *sdata2_section;
#define TARGET_HAVE_TLS true
#endif
+/* MicroBlaze does not do speculative execution. */
+#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
/* Return truth value if a CONST_DOUBLE is ok to be a legitimate constant. */
static bool
microblaze_const_double_ok (rtx op, machine_mode mode)
@@ -585,8 +589,7 @@ microblaze_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
LCT_PURE, /* LCT_CONST? */
Pmode, reg, Pmode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
return insns;
}
diff --git a/gcc/config/mingw/mingw32.h b/gcc/config/mingw/mingw32.h
index 10bcd29..be2461f 100644
--- a/gcc/config/mingw/mingw32.h
+++ b/gcc/config/mingw/mingw32.h
@@ -308,6 +308,15 @@ do { \
#undef TARGET_N_FORMAT_TYPES
#define TARGET_N_FORMAT_TYPES 3
+#undef TARGET_WIN32_TLS
+#define TARGET_WIN32_TLS 1
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION mingw_pe_select_section
+
+#undef DEFAULT_TLS_SEG_REG
+#define DEFAULT_TLS_SEG_REG (TARGET_64BIT ? ADDR_SPACE_SEG_GS : ADDR_SPACE_SEG_FS)
+
#define HAVE_ENABLE_EXECUTE_STACK
#undef CHECK_EXECUTE_STACK_ENABLED
#define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index 08a761d..f224966 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -391,6 +391,15 @@ i386_pe_strip_name_encoding_full (const char *str)
return name;
}
+section *
+mingw_pe_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+ if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+ return get_named_section (decl, ".tls$", reloc);
+ else
+ return default_select_section (decl, reloc, align);
+}
+
void
mingw_pe_unique_section (tree decl, int reloc)
{
@@ -415,6 +424,8 @@ mingw_pe_unique_section (tree decl, int reloc)
prefix = ".text$";
else if (decl_readonly_section (decl, reloc))
prefix = ".rdata$";
+ else if (DECL_THREAD_LOCAL_P (decl))
+ prefix = ".tls$";
else
prefix = ".data$";
len = strlen (name) + strlen (prefix);
@@ -489,6 +500,9 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags,
*f++ = 'e';
#endif
+ if (strcmp (name, ".tls$") == 0)
+ *f++ = 'd';
+
if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0)
/* readonly data */
{
diff --git a/gcc/config/mingw/winnt.h b/gcc/config/mingw/winnt.h
index fa2d6c0..23f4dc9 100644
--- a/gcc/config/mingw/winnt.h
+++ b/gcc/config/mingw/winnt.h
@@ -31,6 +31,7 @@ extern void mingw_pe_file_end (void);
extern void mingw_pe_maybe_record_exported_symbol (tree, const char *, int);
extern void mingw_pe_record_stub (const char *, bool);
extern unsigned int mingw_pe_section_type_flags (tree, const char *, int);
+extern section *mingw_pe_select_section (tree, int, unsigned HOST_WIDE_INT);
extern void mingw_pe_unique_section (tree, int);
extern bool mingw_pe_valid_dllimport_attribute_p (const_tree);
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 24a28dc..81eaa3c 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -3621,9 +3621,7 @@ mips_call_tls_get_addr (rtx sym, enum mips_symbol_type type, rtx v0)
const0_rtx, NULL_RTX, false);
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
- insn = get_insns ();
-
- end_sequence ();
+ insn = end_sequence ();
return insn;
}
@@ -15167,23 +15165,19 @@ mips_ls2_init_dfa_post_cycle_insn (void)
{
start_sequence ();
emit_insn (gen_ls2_alu1_turn_enabled_insn ());
- mips_ls2.alu1_turn_enabled_insn = get_insns ();
- end_sequence ();
+ mips_ls2.alu1_turn_enabled_insn = end_sequence ();
start_sequence ();
emit_insn (gen_ls2_alu2_turn_enabled_insn ());
- mips_ls2.alu2_turn_enabled_insn = get_insns ();
- end_sequence ();
+ mips_ls2.alu2_turn_enabled_insn = end_sequence ();
start_sequence ();
emit_insn (gen_ls2_falu1_turn_enabled_insn ());
- mips_ls2.falu1_turn_enabled_insn = get_insns ();
- end_sequence ();
+ mips_ls2.falu1_turn_enabled_insn = end_sequence ();
start_sequence ();
emit_insn (gen_ls2_falu2_turn_enabled_insn ());
- mips_ls2.falu2_turn_enabled_insn = get_insns ();
- end_sequence ();
+ mips_ls2.falu2_turn_enabled_insn = end_sequence ();
mips_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core");
mips_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core");
@@ -19892,8 +19886,7 @@ mips16_split_long_branches (void)
emit_label (new_label);
}
- jump_sequence = get_insns ();
- end_sequence ();
+ jump_sequence = end_sequence ();
emit_insn_after (jump_sequence, jump_insn);
if (new_label)
diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
index e05dce1..85acea3 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -333,30 +333,31 @@
""
{
rtx system_reg = NULL_RTX;
+ rtx shift_amt = NULL_RTX;
/* Set system register form nds32_intrinsic_register_names[]. */
if ((INTVAL (operands[1]) >= NDS32_INT_H0)
&& (INTVAL (operands[1]) <= NDS32_INT_H15))
{
system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
- operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+ shift_amt = GEN_INT (31 - INTVAL (operands[1]));
}
else if (INTVAL (operands[1]) == NDS32_INT_SWI)
{
system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
- operands[2] = GEN_INT (15);
+ shift_amt = GEN_INT (15);
}
else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
&& (INTVAL (operands[1]) <= NDS32_INT_H31))
{
system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
- operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+ shift_amt = GEN_INT (31 - INTVAL (operands[1]));
}
else if ((INTVAL (operands[1]) >= NDS32_INT_H32)
&& (INTVAL (operands[1]) <= NDS32_INT_H63))
{
system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
- operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32));
+ shift_amt = GEN_INT (31 - (INTVAL (operands[1]) - 32));
}
else
error ("%<get_pending_int%> not support %<NDS32_INT_ALZ%>,"
@@ -366,7 +367,7 @@
if (system_reg != NULL_RTX)
{
emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
- emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+ emit_insn (gen_ashlsi3 (operands[0], operands[0], shift_amt));
emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
emit_insn (gen_unspec_dsb ());
}
diff --git a/gcc/config/nvptx/gen-multilib-matches-tests b/gcc/config/nvptx/gen-multilib-matches-tests
index a07f19a..fbfae88 100644
--- a/gcc/config/nvptx/gen-multilib-matches-tests
+++ b/gcc/config/nvptx/gen-multilib-matches-tests
@@ -18,6 +18,7 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -32,14 +33,15 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
CMMC
-BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_53,sm_70,sm_75,sm_80,sm_89'
+BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_53,sm_61,sm_70,sm_75,sm_80,sm_89'
SMOID sm_30
-SMOIL sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80 sm_89
+SMOIL sm_30 sm_35 sm_37 sm_52 sm_53 sm_61 sm_70 sm_75 sm_80 sm_89
AEMM .=misa?sm_30
CMMC
@@ -52,6 +54,7 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -65,6 +68,7 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -79,6 +83,7 @@ AEMM misa?sm_30=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -93,6 +98,7 @@ AEMM misa?sm_35=misa?sm_30
AEMM misa?sm_35=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM misa?sm_75=misa?sm_80
AEMM misa?sm_75=misa?sm_89
@@ -106,6 +112,7 @@ AEMM misa?sm_30=misa?sm_35
AEMM misa?sm_30=misa?sm_37
AEMM misa?sm_30=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -119,19 +126,55 @@ AEMM misa?sm_37=misa?sm_30
AEMM misa?sm_37=misa?sm_35
AEMM misa?sm_37=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
AEMM .=misa?sm_89
CMMC
-BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_53=sm_30,sm_35,sm_37,sm_52,sm_70,sm_75,sm_80,sm_89'
+BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_61,sm_70,sm_75,sm_80,sm_89'
SMOID sm_53
-SMOIL sm_53 sm_30 sm_35 sm_37 sm_52 sm_70 sm_75 sm_80 sm_89
+SMOIL sm_53 sm_30 sm_35 sm_37 sm_52 sm_61 sm_70 sm_75 sm_80 sm_89
AEMM .=misa?sm_53
CMMC
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_61,sm_30'
+SMOID sm_61
+SMOIL sm_61 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_30=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_61,sm_37'
+SMOID sm_61
+SMOIL sm_61 sm_37
+AEMM misa?sm_37=misa?sm_30
+AEMM misa?sm_37=misa?sm_35
+AEMM misa?sm_37=misa?sm_52
+AEMM misa?sm_37=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_61,sm_70,sm_75,sm_80,sm_89'
+SMOID sm_61
+SMOIL sm_61 sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80 sm_89
+AEMM .=misa?sm_61
+CMMC
+
+
BEGIN '--with-arch=sm_70', '--with-multilib-list=sm_70'
SMOID sm_70
SMOIL sm_70
@@ -140,6 +183,7 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -153,6 +197,7 @@ AEMM misa?sm_30=misa?sm_35
AEMM misa?sm_30=misa?sm_37
AEMM misa?sm_30=misa?sm_52
AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -166,6 +211,7 @@ AEMM misa?sm_53=misa?sm_30
AEMM misa?sm_53=misa?sm_35
AEMM misa?sm_53=misa?sm_37
AEMM misa?sm_53=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -178,6 +224,7 @@ SMOIL sm_70 sm_53 sm_30
AEMM misa?sm_30=misa?sm_35
AEMM misa?sm_30=misa?sm_37
AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -192,6 +239,7 @@ AEMM misa?sm_30=misa?sm_35
AEMM misa?sm_30=misa?sm_37
AEMM misa?sm_30=misa?sm_52
AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
AEMM misa?sm_30=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -205,6 +253,7 @@ AEMM misa?sm_53=misa?sm_30
AEMM misa?sm_53=misa?sm_35
AEMM misa?sm_53=misa?sm_37
AEMM misa?sm_53=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
AEMM misa?sm_53=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -217,6 +266,7 @@ SMOIL sm_75 sm_30 sm_53
AEMM misa?sm_30=misa?sm_35
AEMM misa?sm_30=misa?sm_37
AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
AEMM misa?sm_53=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -232,6 +282,7 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -245,6 +296,7 @@ AEMM misa?sm_30=misa?sm_35
AEMM misa?sm_30=misa?sm_37
AEMM misa?sm_30=misa?sm_52
AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
AEMM misa?sm_30=misa?sm_70
AEMM misa?sm_30=misa?sm_75
AEMM .=misa?sm_80
@@ -259,6 +311,7 @@ AEMM misa?sm_75=misa?sm_35
AEMM misa?sm_75=misa?sm_37
AEMM misa?sm_75=misa?sm_52
AEMM misa?sm_75=misa?sm_53
+AEMM misa?sm_75=misa?sm_61
AEMM misa?sm_75=misa?sm_70
AEMM .=misa?sm_80
AEMM .=misa?sm_89
@@ -273,6 +326,7 @@ AEMM .=misa?sm_35
AEMM .=misa?sm_37
AEMM .=misa?sm_52
AEMM .=misa?sm_53
+AEMM .=misa?sm_61
AEMM .=misa?sm_70
AEMM .=misa?sm_75
AEMM .=misa?sm_80
@@ -286,6 +340,7 @@ AEMM misa?sm_52=misa?sm_30
AEMM misa?sm_52=misa?sm_35
AEMM misa?sm_52=misa?sm_37
AEMM misa?sm_52=misa?sm_53
+AEMM misa?sm_52=misa?sm_61
AEMM misa?sm_52=misa?sm_70
AEMM misa?sm_52=misa?sm_75
AEMM misa?sm_52=misa?sm_80
@@ -293,8 +348,8 @@ AEMM .=misa?sm_89
CMMC
-BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89,sm_30,sm_35,sm_37,sm_52,sm_53,sm_70,sm_75,sm_80'
+BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89,sm_30,sm_35,sm_37,sm_52,sm_53,sm_61,sm_70,sm_75,sm_80'
SMOID sm_89
-SMOIL sm_89 sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80
+SMOIL sm_89 sm_30 sm_35 sm_37 sm_52 sm_53 sm_61 sm_70 sm_75 sm_80
AEMM .=misa?sm_89
CMMC
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index bdfe7f5..bb3f0fc 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -260,8 +260,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
unsigned ix;
const char *sm_ver = NULL, *version = NULL;
const char *sm_ver2 = NULL, *version2 = NULL;
- size_t file_cnt = 0;
- size_t *file_idx = XALLOCAVEC (size_t, len);
+ /* To reduce the number of reallocations for 'file_idx', guess 'file_cnt'
+ (very roughly...), based on 'len'. */
+ const size_t file_cnt_guessed = 13 + len / 27720;
+ auto_vec<size_t> file_idx (file_cnt_guessed);
fprintf (out, "#include <stdint.h>\n\n");
@@ -269,9 +271,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
terminated by a NUL. */
for (size_t i = 0; i != len;)
{
+ file_idx.safe_push (i);
+
char c;
bool output_fn_ptr = false;
- file_idx[file_cnt++] = i;
fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
while ((c = input[i++]))
@@ -349,6 +352,9 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
}
}
+ const size_t file_cnt = file_idx.length ();
+ gcc_checking_assert (file_cnt == obj_count);
+
/* Create function-pointer array, required for reverse
offload function-pointer lookup. */
@@ -778,6 +784,9 @@ main (int argc, char **argv)
}
if (fopenmp)
obstack_ptr_grow (&argv_obstack, "-mgomp");
+ /* The host code may contain exception handling constructs.
+ Handle these as good as we can. */
+ obstack_ptr_grow (&argv_obstack, "-mfake-exceptions");
for (int ix = 1; ix != argc; ix++)
{
diff --git a/gcc/config/nvptx/nvptx-gen.h b/gcc/config/nvptx/nvptx-gen.h
index 893df41..f5b9899 100644
--- a/gcc/config/nvptx/nvptx-gen.h
+++ b/gcc/config/nvptx/nvptx-gen.h
@@ -26,6 +26,7 @@
#define TARGET_SM37 (ptx_isa_option >= PTX_ISA_SM37)
#define TARGET_SM52 (ptx_isa_option >= PTX_ISA_SM52)
#define TARGET_SM53 (ptx_isa_option >= PTX_ISA_SM53)
+#define TARGET_SM61 (ptx_isa_option >= PTX_ISA_SM61)
#define TARGET_SM70 (ptx_isa_option >= PTX_ISA_SM70)
#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
diff --git a/gcc/config/nvptx/nvptx-gen.opt b/gcc/config/nvptx/nvptx-gen.opt
index f45e8ef..bbae32d 100644
--- a/gcc/config/nvptx/nvptx-gen.opt
+++ b/gcc/config/nvptx/nvptx-gen.opt
@@ -39,6 +39,9 @@ EnumValue
Enum(ptx_isa) String(sm_53) Value(PTX_ISA_SM53)
EnumValue
+Enum(ptx_isa) String(sm_61) Value(PTX_ISA_SM61)
+
+EnumValue
Enum(ptx_isa) String(sm_70) Value(PTX_ISA_SM70)
EnumValue
diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h
index d886701..07bcd32 100644
--- a/gcc/config/nvptx/nvptx-opts.h
+++ b/gcc/config/nvptx/nvptx-opts.h
@@ -40,6 +40,7 @@ enum ptx_version
PTX_VERSION_3_1,
PTX_VERSION_4_1,
PTX_VERSION_4_2,
+ PTX_VERSION_5_0,
PTX_VERSION_6_0,
PTX_VERSION_6_3,
PTX_VERSION_7_0,
diff --git a/gcc/config/nvptx/nvptx-sm.def b/gcc/config/nvptx/nvptx-sm.def
index 1485f89..9f9e864 100644
--- a/gcc/config/nvptx/nvptx-sm.def
+++ b/gcc/config/nvptx/nvptx-sm.def
@@ -25,6 +25,7 @@ NVPTX_SM (35, NVPTX_SM_SEP)
NVPTX_SM (37, NVPTX_SM_SEP)
NVPTX_SM (52, NVPTX_SM_SEP)
NVPTX_SM (53, NVPTX_SM_SEP)
+NVPTX_SM (61, NVPTX_SM_SEP)
NVPTX_SM (70, NVPTX_SM_SEP)
NVPTX_SM (75, NVPTX_SM_SEP)
NVPTX_SM (80, NVPTX_SM_SEP)
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 87364bf..a92a1e3 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -220,6 +220,8 @@ first_ptx_version_supporting_sm (enum ptx_isa sm)
return PTX_VERSION_4_1;
case PTX_ISA_SM53:
return PTX_VERSION_4_2;
+ case PTX_ISA_SM61:
+ return PTX_VERSION_5_0;
case PTX_ISA_SM70:
return PTX_VERSION_6_0;
case PTX_ISA_SM75:
@@ -268,6 +270,8 @@ ptx_version_to_string (enum ptx_version v)
return "4.1";
case PTX_VERSION_4_2:
return "4.2";
+ case PTX_VERSION_5_0:
+ return "5.0";
case PTX_VERSION_6_0:
return "6.0";
case PTX_VERSION_6_3:
@@ -294,6 +298,8 @@ ptx_version_to_number (enum ptx_version v, bool major_p)
return major_p ? 4 : 1;
case PTX_VERSION_4_2:
return major_p ? 4 : 2;
+ case PTX_VERSION_5_0:
+ return major_p ? 5 : 0;
case PTX_VERSION_6_0:
return major_p ? 6 : 0;
case PTX_VERSION_6_3:
@@ -2039,8 +2045,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
start_sequence ();
emit_insn (nvptx_gen_shuffle (dst_real, src_real, idx, kind));
emit_insn (nvptx_gen_shuffle (dst_imag, src_imag, idx, kind));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
case E_SImode:
@@ -2060,8 +2065,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
emit_insn (nvptx_gen_pack (dst, tmp0, tmp1));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
case E_V2SImode:
@@ -2079,8 +2083,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
emit_insn (gen_movsi (dst0, tmp0));
emit_insn (gen_movsi (dst1, tmp1));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
case E_V2DImode:
@@ -2098,8 +2101,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
emit_insn (gen_movdi (dst0, tmp0));
emit_insn (gen_movdi (dst1, tmp1));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
case E_BImode:
@@ -2110,8 +2112,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
emit_insn (gen_sel_truesi (tmp, src, GEN_INT (1), const0_rtx));
emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
emit_insn (gen_rtx_SET (dst, gen_rtx_NE (BImode, tmp, const0_rtx)));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
case E_QImode:
@@ -2124,8 +2125,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
emit_insn (gen_rtx_SET (dst, gen_rtx_fmt_e (TRUNCATE, GET_MODE (dst),
tmp)));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
@@ -2188,8 +2188,7 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
emit_insn (nvptx_gen_shared_bcast (tmp, pm, rep, data, vector));
if (pm & PM_write)
emit_insn (gen_rtx_SET (reg, gen_rtx_NE (BImode, tmp, const0_rtx)));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
break;
@@ -2225,8 +2224,7 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
emit_insn (res);
emit_insn (gen_adddi3 (data->ptr, data->ptr,
GEN_INT (GET_MODE_SIZE (GET_MODE (reg)))));
- res = get_insns ();
- end_sequence ();
+ res = end_sequence ();
}
else
rep = 1;
@@ -2359,7 +2357,25 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
{
gcc_checking_assert (!init_frag.active);
/* Just use the default machinery; it's not getting used, anyway. */
- return default_assemble_integer (x, size, aligned_p);
+ bool ok = default_assemble_integer (x, size, aligned_p);
+ /* ..., but a few cases need special handling. */
+ switch (GET_CODE (x))
+ {
+ case SYMBOL_REF:
+ /* The default machinery won't work: we don't define the necessary
+ operations; don't use them outside of this. */
+ gcc_checking_assert (!ok);
+ {
+ /* Just emit something; it's not getting used, anyway. */
+ const char *op = "\t.symbol_ref\t";
+ ok = (assemble_integer_with_op (op, x), true);
+ }
+ break;
+
+ default:
+ break;
+ }
+ return ok;
}
gcc_checking_assert (init_frag.active);
@@ -4579,8 +4595,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
}
emit_insn (gen_rtx_CLOBBER (GET_MODE (tmp), tmp));
emit_insn (gen_rtx_CLOBBER (GET_MODE (ptr), ptr));
- rtx cpy = get_insns ();
- end_sequence ();
+ rtx cpy = end_sequence ();
insn = emit_insn_after (cpy, insn);
}
@@ -5585,8 +5600,7 @@ workaround_uninit_method_1 (void)
if (nvptx_comment && first != NULL)
emit_insn (gen_comment ("Start: Added by -minit-regs=1"));
emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
- rtx_insn *inits = get_insns ();
- end_sequence ();
+ rtx_insn *inits = end_sequence ();
if (dump_file && (dump_flags & TDF_DETAILS))
for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init))
@@ -5642,8 +5656,7 @@ workaround_uninit_method_2 (void)
if (nvptx_comment && first != NULL)
emit_insn (gen_comment ("Start: Added by -minit-regs=2:"));
emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
- rtx_insn *inits = get_insns ();
- end_sequence ();
+ rtx_insn *inits = end_sequence ();
if (dump_file && (dump_flags & TDF_DETAILS))
for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init))
@@ -5713,8 +5726,7 @@ workaround_uninit_method_3 (void)
start_sequence ();
emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
- rtx_insn *inits = get_insns ();
- end_sequence ();
+ rtx_insn *inits = end_sequence ();
if (dump_file && (dump_flags & TDF_DETAILS))
for (rtx_insn *init = inits; init != NULL;
@@ -5745,8 +5757,7 @@ workaround_uninit_method_3 (void)
emit_insn (gen_comment ("Start: Added by -minit-regs=3:"));
emit_insn (e->insns.r);
emit_insn (gen_comment ("End: Added by -minit-regs=3:"));
- e->insns.r = get_insns ();
- end_sequence ();
+ e->insns.r = end_sequence ();
}
}
@@ -7771,6 +7782,18 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name,
#endif
cgraph_node *cnode = cgraph_node::get (name);
+#ifdef ACCEL_COMPILER
+ /* For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106]
+
+ For some reason (yet to be analyzed), they're not 'cnode->referred_to_p ()'.
+ (..., or that's not the right approach at all;
+ <https://inbox.sourceware.org/87v7rx8lbx.fsf@euler.schwinge.ddns.net>
+ "Re: [committed][nvptx] Use .alias directive for mptx >= 6.3"). */
+ if (DECL_CXX_CONSTRUCTOR_P (name)
+ || DECL_CXX_DESTRUCTOR_P (name))
+ ;
+ else
+#endif
if (!cnode->referred_to_p ())
/* Prevent "Internal error: reference to deleted section". */
return;
@@ -7875,8 +7898,6 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name,
#define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
-#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
-#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 35ef4bd..a2bb2fb 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -101,6 +101,7 @@
PTX ISA Version 3.1. */
#define TARGET_PTX_4_1 (ptx_version_option >= PTX_VERSION_4_1)
#define TARGET_PTX_4_2 (ptx_version_option >= PTX_VERSION_4_2)
+#define TARGET_PTX_5_0 (ptx_version_option >= PTX_VERSION_5_0)
#define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
#define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
#define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 3201247..7c3bd69 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1644,7 +1644,9 @@
[(const_int 0)]
""
{
- sorry ("exception handling not supported");
+ if (!fake_exceptions)
+ sorry ("exception handling not supported");
+ DONE;
})
(define_expand "nonlocal_goto"
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 9be81ae..d326ca4 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -88,10 +88,10 @@ march-map=sm_60
Target RejectNegative Alias(misa=,sm_53)
march-map=sm_61
-Target RejectNegative Alias(misa=,sm_53)
+Target RejectNegative Alias(misa=,sm_61)
march-map=sm_62
-Target RejectNegative Alias(misa=,sm_53)
+Target RejectNegative Alias(misa=,sm_61)
march-map=sm_70
Target RejectNegative Alias(misa=,sm_70)
@@ -134,6 +134,9 @@ EnumValue
Enum(ptx_version) String(4.2) Value(PTX_VERSION_4_2)
EnumValue
+Enum(ptx_version) String(5.0) Value(PTX_VERSION_5_0)
+
+EnumValue
Enum(ptx_version) String(6.0) Value(PTX_VERSION_6_0)
EnumValue
@@ -168,6 +171,14 @@ Target Var(nvptx_alias) Init(0) Undocumented
mexperimental
Target Var(nvptx_experimental) Init(0) Undocumented
+mfake-exceptions
+Target Var(fake_exceptions) Init(0) Undocumented
+; With '-mfake-exceptions' enabled, the user-visible behavior in presence of
+; exception handling constructs changes such that the compile-time
+; 'sorry, unimplemented: exception handling not supported' is skipped, code
+; generation proceeds, and instead, exception handling constructs 'abort' at
+; run time. (..., or don't, if they're in dead code.)
+
mfake-ptx-alloca
Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented
; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only
diff --git a/gcc/config/or1k/or1k.cc b/gcc/config/or1k/or1k.cc
index aa486aa..868df67 100644
--- a/gcc/config/or1k/or1k.cc
+++ b/gcc/config/or1k/or1k.cc
@@ -460,8 +460,7 @@ or1k_init_pic_reg (void)
cfun->machine->set_got_insn =
emit_insn (gen_set_got_tmp (pic_offset_table_rtx));
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
edge entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
insert_insn_on_edge (seq, entry_edge);
@@ -1409,8 +1408,9 @@ static bool
or1k_can_change_mode_class (machine_mode from, machine_mode to,
reg_class_t rclass)
{
+ /* Allow cnoverting special flags to SI mode subregs. */
if (rclass == FLAG_REGS)
- return from == to;
+ return from == to || (from == BImode && to == SImode);
return true;
}
@@ -1654,6 +1654,63 @@ or1k_rtx_costs (rtx x, machine_mode mode, int outer_code, int /* opno */,
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS or1k_rtx_costs
+static bool
+or1k_is_cmov_insn (rtx_insn *seq)
+{
+ rtx_insn *curr_insn = seq;
+ rtx set = NULL_RTX;
+
+ /* The pattern may start with a simple set with register operands. Skip
+ through any of those. */
+ while (curr_insn)
+ {
+ set = single_set (curr_insn);
+ if (!set
+ || !REG_P (SET_DEST (set)))
+ return false;
+
+ /* If it's not a simple reg or immediate break. */
+ if (REG_P (SET_SRC (set)) || CONST_INT_P (SET_SRC (set)))
+ curr_insn = NEXT_INSN (curr_insn);
+ else
+ break;
+ }
+
+ if (!curr_insn)
+ return false;
+
+ /* The next instruction should be a compare. OpenRISC has many operators used
+ for comparison so skip and confirm the next is IF_THEN_ELSE. */
+ curr_insn = NEXT_INSN (curr_insn);
+ if (!curr_insn)
+ return false;
+
+ /* And the last instruction should be an IF_THEN_ELSE. */
+ set = single_set (curr_insn);
+ if (!set
+ || !REG_P (SET_DEST (set))
+ || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+ return false;
+
+ return !NEXT_INSN (curr_insn);
+}
+
+/* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We detect if the conversion
+ resulted in a l.cmov instruction and if so we consider it more profitable than
+ branch instructions. */
+
+static bool
+or1k_noce_conversion_profitable_p (rtx_insn *seq,
+ struct noce_if_info *if_info)
+{
+ if (TARGET_CMOV)
+ return or1k_is_cmov_insn (seq);
+
+ return default_noce_conversion_profitable_p (seq, if_info);
+}
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P or1k_noce_conversion_profitable_p
/* A subroutine of the atomic operation splitters. Jump to LABEL if
COND is true. Mark the jump as unlikely to be taken. */
diff --git a/gcc/config/or1k/or1k.md b/gcc/config/or1k/or1k.md
index 627e400..bf71253 100644
--- a/gcc/config/or1k/or1k.md
+++ b/gcc/config/or1k/or1k.md
@@ -515,6 +515,31 @@
(ne:SI (reg:BI SR_F_REGNUM) (const_int 0)))]
"")
+;; Allowing "extending" the BImode SR_F to a general register
+;; avoids 'convert_mode_scalar' from trying to do subregging
+;; which we don't have support for.
+;; We require signed and unsigned extend instructions because
+;; signed comparisons require signed extention, but for SR_F
+;; it doesn't matter.
+
+(define_expand "zero_extendbisi2_sr_f"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:BI 1 "sr_f_reg_operand" "")))]
+ ""
+{
+ emit_insn(gen_sne_sr_f (operands[0]));
+ DONE;
+})
+
+(define_expand "extendbisi2_sr_f"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extend:SI (match_operand:BI 1 "sr_f_reg_operand" "")))]
+ ""
+{
+ emit_insn(gen_sne_sr_f (operands[0]));
+ DONE;
+})
+
(define_insn_and_split "*scc"
[(set (match_operand:SI 0 "register_operand" "=r")
(match_operator:SI 1 "equality_comparison_operator"
@@ -584,7 +609,7 @@
;; Branch instructions
;; -------------------------------------------------------------------------
-(define_expand "cbranchsi4"
+(define_insn_and_split "cbranchsi4"
[(set (pc)
(if_then_else
(match_operator 0 "comparison_operator"
@@ -593,13 +618,27 @@
(label_ref (match_operand 3 "" ""))
(pc)))]
""
+ "#"
+ "&& 1"
+ [(const_int 0)]
{
+ rtx label;
+
+ /* Generate *scc */
or1k_expand_compare (operands);
+ /* Generate *cbranch */
+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+ emit_jump_insn (gen_rtx_SET (pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ operands[0],
+ label,
+ pc_rtx)));
+ DONE;
})
;; Support FP branching
-(define_expand "cbranch<F:mode>4"
+(define_insn_and_split "cbranch<F:mode>4"
[(set (pc)
(if_then_else
(match_operator 0 "fp_comparison_operator"
@@ -608,8 +647,22 @@
(label_ref (match_operand 3 "" ""))
(pc)))]
"TARGET_HARD_FLOAT"
+ "#"
+ "&& 1"
+ [(const_int 0)]
{
+ rtx label;
+
+ /* Generate *scc */
or1k_expand_compare (operands);
+ /* Generate *cbranch */
+ label = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+ emit_jump_insn (gen_rtx_SET (pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ operands[0],
+ label,
+ pc_rtx)));
+ DONE;
})
(define_insn "*cbranch"
diff --git a/gcc/config/or1k/or1k.opt b/gcc/config/or1k/or1k.opt
index 00c5560..d252de0 100644
--- a/gcc/config/or1k/or1k.opt
+++ b/gcc/config/or1k/or1k.opt
@@ -69,8 +69,8 @@ are used to perform unordered floating point compare and set flag operations.
mcmodel=
Target RejectNegative Joined Enum(or1k_cmodel_type) Var(or1k_code_model) Init(CMODEL_SMALL)
Specify the code model used for accessing memory addresses. Specifying large
-enables generating binaries with large global offset tables. By default the
-value is small.
+enables generating binaries with large global offset tables and calling
+functions anywhere in an executable. By default the value is small.
Enum
Name(or1k_cmodel_type) Type(enum or1k_cmodel_type)
diff --git a/gcc/config/or1k/predicates.md b/gcc/config/or1k/predicates.md
index 11bb518..7ccfd09 100644
--- a/gcc/config/or1k/predicates.md
+++ b/gcc/config/or1k/predicates.md
@@ -60,8 +60,13 @@
(and (match_operand 0 "register_operand")
(match_test "TARGET_ROR"))))
+(define_predicate "sr_f_reg_operand"
+ (and (match_operand 0 "register_operand")
+ (match_test "REGNO (op) == SR_F_REGNUM")))
+
(define_predicate "call_insn_operand"
- (ior (match_code "symbol_ref")
+ (ior (and (match_code "symbol_ref")
+ (match_test "!TARGET_CMODEL_LARGE"))
(match_operand 0 "register_operand")))
(define_predicate "high_operand"
diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h
index 74e30ed..1439447 100644
--- a/gcc/config/pa/pa-hpux.h
+++ b/gcc/config/pa/pa-hpux.h
@@ -114,3 +114,17 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_LIBC_HAS_FUNCTION
#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Assume we have libatomic if sync libcalls are disabled. */
+#undef TARGET_HAVE_LIBATOMIC
+#define TARGET_HAVE_LIBATOMIC (!flag_sync_libcalls)
+
+/* The SYNC operations are implemented as library functions, not
+ INSN patterns. As a result, the HAVE defines for the patterns are
+ not defined. We need to define them to generate the corresponding
+ __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+ defines. */
+#define HAVE_sync_compare_and_swapqi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swaphi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swapsi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swapdi (flag_sync_libcalls)
diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 9542d3b..b63ccf1 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -1123,8 +1123,7 @@ legitimize_tls_address (rtx addr)
else
emit_insn (gen_tld_load (tmp, addr));
t1 = hppa_tls_call (tmp);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
t2 = gen_reg_rtx (Pmode);
emit_libcall_block (insn, t2, t1,
gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 6319108..47e5f24 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -1040,8 +1040,7 @@ pru_expand_fp_compare (rtx comparison, machine_mode mode)
cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
op0, op_mode, op1, op_mode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_libcall_block (insns, cmp, cmp,
gen_rtx_fmt_ee (code, SImode, op0, op1));
@@ -2919,8 +2918,7 @@ pru_reorg_loop (rtx_insn *insns)
LABEL_NUSES (end->label)++;
/* Emit the whole sequence before the doloop_end. */
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
emit_insn_before (insn, end->insn);
/* Delete the doloop_end. */
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0c3b0cc..e829313 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1673,3 +1673,122 @@
DONE;
}
[(set_attr "type" "vandn")])
+
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;; =============================================================================
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (any_int_binop_no_shift_vdup_v:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "<binop_rhs2_predicate>")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2],
+ operands[1], <CODE>,
+ <MODE>mode);
+ }
+ [(set_attr "type" "vialu")])
+
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (any_int_binop_no_shift_v_vdup:V_VLSI
+ (match_operand:V_VLSI 1 "<binop_rhs2_predicate>")
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 2 "register_operand"))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1],
+ operands[2], <CODE>,
+ <MODE>mode);
+ }
+ [(set_attr "type" "vialu")])
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vf
+;; Include
+;; - vfmadd.vf
+;; - vfmsub.vf
+;; - vfnmadd.vf
+;; - vfnmsub.vf
+;; =============================================================================
+
+;; vfmadd.vf, vfmsub.vf
+(define_insn_and_split "*<optab>_vf_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd")
+ (plus_minus:V_VLSF
+ (mult:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 1 "register_operand" " f"))
+ (match_operand:V_VLSF 2 "register_operand" " 0"))
+ (match_operand:V_VLSF 3 "register_operand" " vr")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ operands[2]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, <MODE>mode),
+ riscv_vector::TERNARY_OP_FRM_DYN, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfmuladd")]
+)
+
+;; vfnmsub.vf
+(define_insn_and_split "*vfnmsub_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd")
+ (minus:V_VLSF
+ (match_operand:V_VLSF 3 "register_operand" " vr")
+ (mult:V_VLSF
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 1 "register_operand" " f"))
+ (match_operand:V_VLSF 2 "register_operand" " 0"))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ operands[2]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (PLUS, <MODE>mode),
+ riscv_vector::TERNARY_OP_FRM_DYN, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfmuladd")]
+)
+
+;; vfnmadd.vf
+(define_insn_and_split "*vfnmadd_<mode>"
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vd")
+ (minus:V_VLSF
+ (mult:V_VLSF
+ (neg:V_VLSF
+ (match_operand:V_VLSF 2 "register_operand" " 0"))
+ (vec_duplicate:V_VLSF
+ (match_operand:<VEL> 1 "register_operand" " f")))
+ (match_operand:V_VLSF 3 "register_operand" " vr")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+ operands[2]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (MINUS, <MODE>mode),
+ riscv_vector::TERNARY_OP_FRM_DYN, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfmuladd")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 9e51e3c..94a61bd 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1338,7 +1338,7 @@
(define_expand "select_vl<mode>"
[(match_operand:P 0 "register_operand")
(match_operand:P 1 "vector_length_operand")
- (match_operand:P 2 "")]
+ (match_operand:P 2 "immediate_operand")]
"TARGET_VECTOR"
{
riscv_vector::expand_select_vl (operands);
@@ -2491,19 +2491,13 @@
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))]
"TARGET_VECTOR"
-{
- /* First emit a widening addition. */
- rtx tmp1 = gen_reg_rtx (<MODE>mode);
- rtx ops1[] = {tmp1, operands[1], operands[2]};
- insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
-
- /* Then a narrowing shift. */
- rtx ops2[] = {operands[0], tmp1, const1_rtx};
- icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
- DONE;
-})
+ {
+ insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+ operands);
+ DONE;
+ }
+)
(define_expand "avg<v_double_trunc>3_ceil"
[(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -2517,25 +2511,13 @@
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
(const_int 1)))))]
"TARGET_VECTOR"
-{
- /* First emit a widening addition. */
- rtx tmp1 = gen_reg_rtx (<MODE>mode);
- rtx ops1[] = {tmp1, operands[1], operands[2]};
- insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
-
- /* Then add 1. */
- rtx tmp2 = gen_reg_rtx (<MODE>mode);
- rtx ops2[] = {tmp2, tmp1, const1_rtx};
- icode = code_for_pred_scalar (PLUS, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
-
- /* Finally, a narrowing shift. */
- rtx ops3[] = {operands[0], tmp2, const1_rtx};
- icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
- DONE;
-})
+ {
+ insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+ operands);
+ DONE;
+ }
+)
;; csrwi vxrm, 2
;; vaaddu.vv vd, vs2, vs1
@@ -2546,7 +2528,8 @@
"TARGET_VECTOR"
{
insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+ operands);
DONE;
})
@@ -2559,7 +2542,8 @@
"TARGET_VECTOR"
{
insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
- riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+ operands);
DONE;
})
@@ -2584,7 +2568,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2594,7 +2579,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2604,7 +2590,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2614,7 +2601,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2624,7 +2612,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2634,7 +2623,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2644,7 +2634,8 @@
(match_operand:V_VLSF 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+ riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode,
+ <VCONVERT>mode);
DONE;
}
)
@@ -2701,7 +2692,8 @@
(match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
+ riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode,
+ <V_F2SI_CONVERT>mode);
DONE;
}
)
@@ -2711,7 +2703,8 @@
(match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
+ riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode,
+ <V_F2DI_CONVERT>mode);
DONE;
}
)
@@ -2721,7 +2714,8 @@
(match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
+ riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode,
+ <V_F2SI_CONVERT>mode);
DONE;
}
)
@@ -2731,7 +2725,8 @@
(match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
- riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
+ riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode,
+ <V_F2DI_CONVERT>mode);
DONE;
}
)
@@ -2763,7 +2758,8 @@
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode);
+ riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2],
+ <MODE>mode);
DONE;
}
)
@@ -2774,7 +2770,8 @@
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2], <MODE>mode);
+ riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2],
+ <MODE>mode);
DONE;
}
)
@@ -2785,7 +2782,8 @@
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2], <MODE>mode);
+ riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2],
+ <MODE>mode);
DONE;
}
)
@@ -2796,7 +2794,8 @@
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], <MODE>mode);
+ riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2],
+ <MODE>mode);
DONE;
}
)
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 5ed5e18..21426f4 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1,4 +1,4 @@
-;; Machine description for RISC-V Bit Manipulation operations.
+;); Machine description for RISC-V Bit Manipulation operations.
;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
;; This file is part of GCC.
@@ -68,23 +68,25 @@
[(set (match_operand:DI 0 "register_operand")
(zero_extend:DI (plus:SI (ashift:SI (subreg:SI (match_operand:DI 1 "register_operand") 0)
(match_operand:QI 2 "imm123_operand"))
- (subreg:SI (match_operand:DI 3 "register_operand") 0))))]
+ (subreg:SI (match_operand:DI 3 "register_operand") 0))))
+ (clobber (match_operand:DI 4 "register_operand"))]
"TARGET_64BIT && TARGET_ZBA"
- [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
- (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+ [(set (match_dup 4) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
+ (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 4) 0)))])
(define_split
[(set (match_operand:DI 0 "register_operand")
(zero_extend:DI (plus:SI (subreg:SI (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
(match_operand:QI 2 "imm123_operand"))
(match_operand:DI 3 "consecutive_bits_operand")) 0)
- (subreg:SI (match_operand:DI 4 "register_operand") 0))))]
+ (subreg:SI (match_operand:DI 4 "register_operand") 0))))
+ (clobber (match_operand:DI 5 "register_operand"))]
"TARGET_64BIT && TARGET_ZBA
&& riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
/* Ensure the mask includes all the bits in SImode. */
&& ((INTVAL (operands[3]) & (HOST_WIDE_INT_1U << 31)) != 0)"
- [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
- (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+ [(set (match_dup 5) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
+ (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 5) 0)))])
; Make sure that an andi followed by a sh[123]add remains a two instruction
; sequence--and is not torn apart into slli, slri, add.
@@ -195,13 +197,14 @@
(match_operand:QI 2 "imm123_operand"))
(match_operand 3 "consecutive_bits32_operand"))
(match_operand:DI 4 "register_operand"))
- (match_operand 5 "immediate_operand")))]
+ (match_operand 5 "immediate_operand")))
+ (clobber (match_operand:DI 6 "register_operand"))]
"TARGET_64BIT && TARGET_ZBA"
- [(set (match_dup 0)
+ [(set (match_dup 6)
(plus:DI (and:DI (ashift:DI (match_dup 1) (match_dup 2))
(match_dup 3))
(match_dup 4)))
- (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 5)))])
+ (set (match_dup 0) (plus:DI (match_dup 6) (match_dup 5)))])
;; ZBB extension.
@@ -423,39 +426,40 @@
"rolw\t%0,%1,%2"
[(set_attr "type" "bitmanip")])
-(define_insn_and_split "*<bitmanip_optab><GPR:mode>3_mask"
- [(set (match_operand:GPR 0 "register_operand" "= r")
- (bitmanip_rotate:GPR
- (match_operand:GPR 1 "register_operand" " r")
- (match_operator 4 "subreg_lowpart_operator"
- [(and:GPR2
- (match_operand:GPR2 2 "register_operand" "r")
- (match_operand 3 "<GPR:shiftm1>" "<GPR:shiftm1p>"))])))]
+(define_insn "*<bitmanip_optab><mode>3_mask"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (bitmanip_rotate:X
+ (match_operand:X 1 "register_operand" "r")
+ (match_operator 4 "subreg_lowpart_operator"
+ [(and:X (match_operand:X 2 "register_operand" "r")
+ (match_operand 3 "<X:shiftm1>" "<X:shiftm1p>"))])))]
"TARGET_ZBB || TARGET_ZBKB"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (bitmanip_rotate:GPR (match_dup 1)
- (match_dup 2)))]
- "operands[2] = gen_lowpart (QImode, operands[2]);"
+ "<bitmanip_insn>\t%0,%1,%2"
[(set_attr "type" "bitmanip")
- (set_attr "mode" "<GPR:MODE>")])
+ (set_attr "mode" "<X:MODE>")])
-(define_insn_and_split "*<bitmanip_optab>si3_sext_mask"
- [(set (match_operand:DI 0 "register_operand" "= r")
- (sign_extend:DI (bitmanip_rotate:SI
- (match_operand:SI 1 "register_operand" " r")
- (match_operator 4 "subreg_lowpart_operator"
- [(and:GPR
- (match_operand:GPR 2 "register_operand" "r")
- (match_operand 3 "const_si_mask_operand"))]))))]
+(define_insn "*<bitmanip_optab>3_mask_si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (bitmanip_rotate:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operator 3 "subreg_lowpart_operator"
+ [(and:X (match_operand:SI 2 "register_operand" "r")
+ (const_int 31))])))]
"TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (sign_extend:DI (bitmanip_rotate:SI (match_dup 1)
- (match_dup 2))))]
- "operands[2] = gen_lowpart (QImode, operands[2]);"
+ "<bitmanip_insn>w\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn "*<bitmanip_optab>si3_sext_mask"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI
+ (bitmanip_rotate:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operator 3 "subreg_lowpart_operator"
+ [(and:X (match_operand:GPR 2 "register_operand" "r")
+ (const_int 31))]))))]
+ "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)"
+ "<bitmanip_insn>w\t%0,%1,%2"
[(set_attr "type" "bitmanip")
(set_attr "mode" "DI")])
@@ -842,44 +846,40 @@
[(set_attr "type" "bitmanip")])
;; In case we have "val & ~IMM" where ~IMM has 2 bits set.
-(define_insn_and_split "*bclri<mode>_nottwobits"
- [(set (match_operand:X 0 "register_operand" "=r")
- (and:X (match_operand:X 1 "register_operand" "r")
- (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))]
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (and:X (match_operand:X 1 "register_operand")
+ (match_operand:X 2 "const_nottwobits_not_arith_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
"TARGET_ZBS && !paradoxical_subreg_p (operands[1])"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
- (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+ [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
+ (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
{
- unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
- unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+ unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
+ unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
- operands[3] = GEN_INT (~bits | topbit);
- operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+ operands[4] = GEN_INT (~bits | topbit);
+ operands[5] = GEN_INT (~topbit);
+})
;; In case of a paradoxical subreg, the sign bit and the high bits are
;; not allowed to be changed
-(define_insn_and_split "*bclridisi_nottwobits"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI (match_operand:DI 1 "register_operand" "r")
- (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))]
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (and:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "const_nottwobits_not_arith_operand")))
+ (clobber (match_operand:DI 3 "register_operand"))]
"TARGET_64BIT && TARGET_ZBS
&& clz_hwi (~UINTVAL (operands[2])) > 33"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
- (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
+ [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4)))
+ (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))]
{
- unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
- unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+ unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
+ unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
- operands[3] = GEN_INT (~bits | topbit);
- operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+ operands[4] = GEN_INT (~bits | topbit);
+ operands[5] = GEN_INT (~topbit);
+})
;; An outer AND with a constant where bits 31..63 are 0 can be seen as
;; a virtual zero extension from 31 to 64 bits.
@@ -908,6 +908,24 @@
"bext\t%0,%1,%2"
[(set_attr "type" "bitmanip")])
+;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants
+;; that mask/extend the count if we want to eliminate those ops
+;;
+;; We could (in theory) use GPR for the various modes, but I haven't
+;; seen those cases appear in practice. Without a testcase I've
+;; elected to keep the modes X which is easy to reason about.
+(define_insn "*bext<mode>_mask_pos"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (zero_extract:X (match_operand:X 1 "register_operand" "r")
+ (const_int 1)
+ (and:X
+ (match_operand:X 2 "register_operand" "r")
+ (match_operand 3 "const_int_operand"))))]
+ "(TARGET_ZBS
+ && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
;; This is a bext followed by a seqz. Normally this would be a 3->2 split
;; But the and-not pattern with a constant operand is a define_insn_and_split,
;; so this looks like a 2->2 split, which combine rejects. So implement it
@@ -992,12 +1010,13 @@
[(set (match_operand:X 0 "register_operand")
(and:X (not:X (lshiftrt:X (match_operand:X 1 "register_operand")
(match_operand:QI 2 "register_operand")))
- (const_int 1)))]
+ (const_int 1)))
+ (clobber (match_operand:X 3 "register_operand"))]
"TARGET_ZBS"
- [(set (match_dup 0) (zero_extract:X (match_dup 1)
+ [(set (match_dup 3) (zero_extract:X (match_dup 1)
(const_int 1)
(match_dup 2)))
- (set (match_dup 0) (xor:X (match_dup 0) (const_int 1)))]
+ (set (match_dup 0) (xor:X (match_dup 3) (const_int 1)))]
"operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
;; We can create a polarity-reversed mask (i.e. bit N -> { set = 0, clear = -1 })
@@ -1008,49 +1027,49 @@
(neg:GPR (eq:GPR (zero_extract:GPR (match_operand:GPR 1 "register_operand")
(const_int 1)
(match_operand 2))
- (const_int 0))))]
+ (const_int 0))))
+ (clobber (match_operand:X 3 "register_operand"))]
"TARGET_ZBS"
- [(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
- (set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))])
+ [(set (match_dup 3) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
+ (set (match_dup 0) (plus:GPR (match_dup 3) (const_int -1)))])
;; Catch those cases where we can use a bseti/binvi + ori/xori or
;; bseti/binvi + bseti/binvi instead of a lui + addi + or/xor sequence.
(define_insn_and_split "*<or_optab>i<mode>_extrabit"
[(set (match_operand:X 0 "register_operand" "=r")
(any_or:X (match_operand:X 1 "register_operand" "r")
- (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))]
+ (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))
+ (clobber (match_scratch:X 3 "=&r"))]
"TARGET_ZBS && !single_bit_mask_operand (operands[2], VOIDmode)"
"#"
"&& reload_completed"
- [(set (match_dup 0) (<or_optab>:X (match_dup 1) (match_dup 3)))
- (set (match_dup 0) (<or_optab>:X (match_dup 0) (match_dup 4)))]
+ [(set (match_dup 3) (<or_optab>:X (match_dup 1) (match_dup 4)))
+ (set (match_dup 0) (<or_optab>:X (match_dup 3) (match_dup 5)))]
{
unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
- operands[3] = GEN_INT (bits &~ topbit);
- operands[4] = GEN_INT (topbit);
+ operands[4] = GEN_INT (bits &~ topbit);
+ operands[5] = GEN_INT (topbit);
}
[(set_attr "type" "bitmanip")])
;; Same to use blcri + andi and blcri + bclri
-(define_insn_and_split "*andi<mode>_extrabit"
- [(set (match_operand:X 0 "register_operand" "=r")
- (and:X (match_operand:X 1 "register_operand" "r")
- (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))]
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (and:X (match_operand:X 1 "register_operand")
+ (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits")))
+ (clobber (match_operand:X 3 "register_operand"))]
"TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
- (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+ [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
+ (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
{
unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits);
- operands[3] = GEN_INT (bits | topbit);
- operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+ operands[4] = GEN_INT (bits | topbit);
+ operands[5] = GEN_INT (~topbit);
+})
;; If we have the ZBA extension, then we can clear the upper half of a 64
;; bit object with a zext.w. So if we have AND where the constant would
@@ -1203,7 +1222,7 @@
we can't keep it in 64 bit variable.)
then use clmul instruction to implement the CRC,
otherwise (TARGET_ZBKB) generate table based using brev. */
- if ((TARGET_ZBKC || TARGET_ZBC) && <ANYI:MODE>mode < word_mode)
+ if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < word_mode)
expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
operands);
else if (TARGET_ZBKB)
@@ -1235,7 +1254,8 @@
(match_operand:SUBX 3)]
UNSPEC_CRC))]
/* We don't support the case when data's size is bigger than CRC's size. */
- "(TARGET_ZBKC || TARGET_ZBC) && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
+ "(TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC)
+ && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
{
/* If we have the ZBC or ZBKC extension (ie, clmul) and
it is possible to store the quotient within a single variable
@@ -1245,3 +1265,89 @@
expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
DONE;
})
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way. That's future work.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (any_or:X (match_operand:X 1 "register_operand")
+ (match_operand:X 2 "const_int_operand")))
+ (clobber (match_operand:X 3 "register_operand"))]
+ "TARGET_ZBB
+ && (riscv_const_insns (operands[2], true)
+ > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+ [(const_int 0)]
+{
+ /* Get the inverted constant into the temporary register. */
+ riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+ /* For xnor, the NOT operation is in a different position. So
+ we have to customize the split code we generate a bit.
+
+ It is expected that AND will be handled like IOR in the future. */
+ if (<CODE> == XOR)
+ {
+ rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+ x = gen_rtx_NOT (<X:MODE>mode, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ else
+ {
+ rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+ x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ DONE;
+})
+
+;; More forms of single bit extraction. The RISC-V port does not
+;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position
+;; is masked.
+;;
+;; We could in theory use this for rv32 as well, but it probably does
+;; not occur in practice. The bit position would need to be QI/HI mode,
+;; otherwise we would not need the zero extension.
+;;
+;; One could also argue that the zero extension is redundant and should
+;; have been optimized away during RTL simplification.
+(define_insn "*bextdi_position_ze_masked"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+ (const_int 1)
+ (zero_extend:DI
+ (and:SI (match_operand:SI 2 "register_operand" "r")
+ (const_int 63)))))]
+ "TARGET_64BIT && TARGET_ZBS"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
+;; Same as above, but without the extraneous zero_extend.
+(define_insn "*bextdi_position_ze_masked"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (zero_extract:X
+ (match_operand:X 1 "register_operand" "r")
+ (const_int 1)
+ (and:X (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "bitpos_mask_operand" "n"))))]
+ "TARGET_64BIT && TARGET_ZBS"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
+;; This has shown up in testing. In particular we end up with an
+;; immediate input. We can load that into a register and target
+;; one of the above bext patterns.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (and:X (lshiftrt:X (match_operand 1 "immediate_operand")
+ (match_operand:QI 2 "register_operand"))
+ (const_int 1)))
+ (clobber (match_operand:X 3 "register_operand"))]
+ ""
+ [(set (match_dup 3) (match_dup 1))
+ (set (match_dup 0) (zero_extract:X (match_dup 3)
+ (const_int 1)
+ (zero_extend:X (match_dup 2))))])
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index ba3c6e6..ccab1a2 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -43,6 +43,10 @@
(define_register_constraint "cf" "TARGET_HARD_FLOAT ? RVC_FP_REGS : (TARGET_ZFINX ? RVC_GR_REGS : NO_REGS)"
"RVC floating-point registers (f8-f15), if available, reuse GPR as FPR when use zfinx.")
+(define_register_constraint "cR" "RVC_GR_REGS"
+ "Even-odd RVC general purpose register (x8-x15)."
+ "regno % 2 == 0")
+
;; General constraints
(define_constraint "I"
@@ -311,3 +315,17 @@
"Shifting immediate for SIMD shufflei3."
(and (match_code "const_int")
(match_test "IN_RANGE (ival, -64, -1)")))
+
+(define_constraint "Ou01"
+ "A 1-bit unsigned immediate."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 1)")))
+
+(define_constraint "Ou02"
+ "A 2-bit unsigned immediate."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "Q"
+ "An address operand that is valid for a prefetch instruction"
+ (match_operand 0 "prefetch_operand"))
diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc
new file mode 100644
index 0000000..17b8f5b
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-ext-opt.cc
@@ -0,0 +1,105 @@
+#include <vector>
+#include <string>
+#include <set>
+#include <stdio.h>
+#include "riscv-opts.h"
+
+struct version_t
+{
+ int major;
+ int minor;
+ version_t (int major, int minor,
+ enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
+ : major (major), minor (minor)
+ {}
+ bool operator<(const version_t &other) const
+ {
+ if (major != other.major)
+ return major < other.major;
+ return minor < other.minor;
+ }
+
+ bool operator== (const version_t &other) const
+ {
+ return major == other.major && minor == other.minor;
+ }
+};
+
+static void
+print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
+ const std::string &desc,
+ const std::vector<version_t> &supported_versions)
+{
+ // Implementation of the function to print the documentation entry
+ // for the extension.
+ std::set<version_t> unique_versions;
+ for (const auto &version : supported_versions)
+ unique_versions.insert (version);
+ printf ("@item %s\n", ext_name.c_str ());
+ printf ("@tab");
+ for (const auto &version : unique_versions)
+ {
+ printf (" %d.%d", version.major, version.minor);
+ }
+ printf ("\n");
+ printf ("@tab %s", full_name.c_str ());
+ if (desc.size ())
+ printf (", %s", desc.c_str ());
+ printf ("\n\n");
+}
+
+int
+main ()
+{
+ puts ("; Target options for the RISC-V port of the compiler");
+ puts (";");
+ puts ("; Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts (";");
+ puts ("; This file is part of GCC.");
+ puts (";");
+ puts (
+ "; GCC is free software; you can redistribute it and/or modify it under");
+ puts (
+ "; the terms of the GNU General Public License as published by the Free");
+ puts (
+ "; Software Foundation; either version 3, or (at your option) any later");
+ puts ("; version.");
+ puts (";");
+ puts ("; GCC is distributed in the hope that it will be useful, but WITHOUT");
+ puts ("; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY");
+ puts ("; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public");
+ puts ("; License for more details.");
+ puts (";");
+ puts ("; You should have received a copy of the GNU General Public License");
+ puts ("; along with GCC; see the file COPYING3. If not see ");
+ puts ("; <http://www.gnu.org/licenses/>.");
+
+ puts ("; This file is generated automatically using");
+ puts ("; gcc/config/riscv/gen-riscv-ext-opt.cc from:");
+ puts ("; gcc/config/riscv/riscv-ext.def");
+ puts ("");
+ puts ("; Please *DO NOT* edit manually.");
+
+ std::set<std::string> all_vars;
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS, \
+ SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID, \
+ BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS) \
+ all_vars.insert ("riscv_" #FLAG_GROUP "_subext");
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+ for (auto var : all_vars)
+ {
+ puts ("TargetVariable");
+ printf ("int %s\n\n", var.c_str ());
+ }
+
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS, \
+ SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID, \
+ BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS) \
+ puts ("Mask(" #UPPERCAE_NAME ") Var(riscv_" #FLAG_GROUP "_subext)\n");
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+ return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-ext-texi.cc b/gcc/config/riscv/gen-riscv-ext-texi.cc
new file mode 100644
index 0000000..c29a375
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-ext-texi.cc
@@ -0,0 +1,88 @@
+#include <vector>
+#include <string>
+#include <set>
+#include <stdio.h>
+#include "riscv-opts.h"
+
+struct version_t
+{
+ int major_version;
+ int minor_version;
+ version_t (int major, int minor,
+ enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
+ : major_version (major), minor_version (minor)
+ {}
+ bool operator<(const version_t &other) const
+ {
+ if (major_version != other.major_version)
+ return major_version < other.major_version;
+ return minor_version < other.minor_version;
+ }
+
+ bool operator== (const version_t &other) const
+ {
+ return major_version == other.major_version && minor_version == other.minor_version;
+ }
+};
+
+static void
+print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
+ const std::string &desc,
+ const std::vector<version_t> &supported_versions)
+{
+ // Implementation of the function to print the documentation entry
+ // for the extension.
+ std::set<version_t> unique_versions;
+ for (const auto &version : supported_versions)
+ unique_versions.insert (version);
+ printf ("@item %s\n", ext_name.c_str ());
+ printf ("@tab");
+ for (const auto &version : unique_versions)
+ {
+ printf (" %d.%d", version.major_version, version.minor_version);
+ }
+ printf ("\n");
+ printf ("@tab %s", full_name.c_str ());
+ if (desc.size ())
+ printf (", %s", desc.c_str ());
+ printf ("\n\n");
+}
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-ext-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-ext.def");
+ puts ("@c gcc/config/riscv/riscv-opts.h");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@multitable @columnfractions .10 .10 .80");
+ puts ("@headitem Extension Name @tab Supported Version @tab Description");
+ puts ("");
+
+ /* g extension is a very speical extension that no clear version... */
+ puts ("@item g");
+ puts ("@tab -");
+ puts (
+ "@tab General-purpose computing base extension, @samp{g} will expand to");
+ puts ("@samp{i}, @samp{m}, @samp{a}, @samp{f}, @samp{d}, @samp{zicsr} and");
+ puts ("@samp{zifencei}.");
+ puts ("");
+
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS, \
+ SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID, \
+ BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS) \
+ print_ext_doc_entry (#NAME, FULL_NAME, DESC, \
+ std::vector<version_t> SUPPORTED_VERSIONS);
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+ puts ("@end multitable");
+ return 0;
+}
diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md
index cb71941..ab9e57f 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -141,3 +141,7 @@
(eq_attr "type" "rdvlenb,rdvl")
"vxu_ooo_issue,vxu_ooo_issue")
+;; Vector sf_vcp.
+(define_insn_reservation "vec_sf_vcp" 2
+ (eq_attr "type" "sf_vc,sf_vc_se")
+ "vxu_ooo_issue")
diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc
index 6de23cb6..f296089 100644
--- a/gcc/config/riscv/genrvv-type-indexer.cc
+++ b/gcc/config/riscv/genrvv-type-indexer.cc
@@ -23,8 +23,14 @@ along with GCC; see the file COPYING3. If not see
#include <assert.h>
#include <math.h>
-#define BOOL_SIZE_LIST {1, 2, 4, 8, 16, 32, 64}
-#define EEW_SIZE_LIST {8, 16, 32, 64}
+#define BOOL_SIZE_LIST \
+ { \
+ 1, 2, 4, 8, 16, 32, 64 \
+ }
+#define EEW_SIZE_LIST \
+ { \
+ 8, 16, 32, 64 \
+ }
#define LMUL1_LOG2 0
std::string
@@ -167,7 +173,7 @@ floattype (unsigned sew, int lmul_log2)
std::string
expand_floattype (unsigned sew, int lmul_log2, unsigned nf)
{
- if (sew != 8 || nf!= 1
+ if (sew != 8 || nf != 1
|| (!valid_type (sew * 4, lmul_log2 + 2, /*float_t*/ true)))
return "INVALID";
@@ -297,11 +303,13 @@ main (int argc, const char **argv)
for (unsigned eew : EEW_SIZE_LIST)
fprintf (fp, " /*SIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew,
- inttype (eew, LMUL1_LOG2, /* unsigned_p */false).c_str ());
+ inttype (eew, LMUL1_LOG2, /* unsigned_p */ false).c_str ());
for (unsigned eew : EEW_SIZE_LIST)
fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew,
- inttype (eew, LMUL1_LOG2, /* unsigned_p */true).c_str ());
+ inttype (eew, LMUL1_LOG2, /* unsigned_p */ true).c_str ());
+
+ fprintf (fp, " /*X2*/ INVALID,\n");
for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
{
@@ -426,6 +434,10 @@ main (int argc, const char **argv)
fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n",
eew);
+ fprintf (
+ fp, " /*X2*/ %s,\n",
+ inttype (sew * 2, lmul_log2 + 1, /*unsigned_p*/ true).c_str ());
+
for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
{
unsigned multiple_of_lmul = 1 << lmul_log2_offset;
@@ -501,6 +513,8 @@ main (int argc, const char **argv)
for (unsigned eew : EEW_SIZE_LIST)
fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n", eew);
+ fprintf (fp, " /*X2*/ INVALID,\n");
+
for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
{
unsigned multiple_of_lmul = 1 << lmul_log2_offset;
@@ -588,6 +602,8 @@ main (int argc, const char **argv)
fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n",
eew);
+ fprintf (fp, " /*X2*/ INVALID,\n");
+
for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
{
unsigned multiple_of_lmul = 1 << lmul_log2_offset;
diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h
new file mode 100644
index 0000000..047399b
--- /dev/null
+++ b/gcc/config/riscv/gnu.h
@@ -0,0 +1,59 @@
+/* Definitions for RISC-V GNU/Hurd systems with ELF format.
+ Copyright (C) 1998-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do { \
+ GNU_USER_TARGET_OS_CPP_BUILTINS(); \
+ } while (0)
+
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1"
+
+#define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#define LD_EMUL_SUFFIX \
+ "%{mabi=lp64d:}" \
+ "%{mabi=lp64f:_lp64f}" \
+ "%{mabi=lp64:_lp64}" \
+ "%{mabi=ilp32d:}" \
+ "%{mabi=ilp32f:_ilp32f}" \
+ "%{mabi=ilp32:_ilp32}"
+
+#define LINK_SPEC "\
+-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \
+%{mno-relax:--no-relax} \
+-X \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%{shared} \
+ %{!shared: \
+ %{!static: \
+ %{!static-pie: \
+ %{rdynamic:-export-dynamic} \
+ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
+ %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}"
+
+#define STARTFILE_PREFIX_SPEC \
+ "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \
+ "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \
+ "/lib/ " \
+ "/usr/lib/ "
+
+#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 214c20b..584b345 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -262,6 +262,9 @@
(define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")])
+(define_code_attr OPTAB [(ior "IOR")
+ (xor "XOR")])
+
;; -------------------------------------------------------------------
;; Code Attributes
diff --git a/gcc/config/riscv/mips-p8700.md b/gcc/config/riscv/mips-p8700.md
new file mode 100644
index 0000000..ae0ea8d
--- /dev/null
+++ b/gcc/config/riscv/mips-p8700.md
@@ -0,0 +1,167 @@
+;; DFA-based pipeline description for MIPS P8700.
+;;
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "mips_p8700_agen_alq_pipe, mips_p8700_mdu_pipe, mips_p8700_fpu_pipe")
+
+;; The address generation queue (AGQ) has AL2, CTISTD and LDSTA pipes
+(define_cpu_unit "mips_p8700_agq, mips_p8700_al2, mips_p8700_ctistd, mips_p8700_lsu"
+ "mips_p8700_agen_alq_pipe")
+
+(define_cpu_unit "mips_p8700_gpmul, mips_p8700_gpdiv" "mips_p8700_mdu_pipe")
+
+;; The arithmetic-logic-unit queue (ALQ) has ALU pipe
+(define_cpu_unit "mips_p8700_alq, mips_p8700_alu" "mips_p8700_agen_alq_pipe")
+
+;; The floating-point-unit queue (FPQ) has short and long pipes
+(define_cpu_unit "mips_p8700_fpu_short, mips_p8700_fpu_long" "mips_p8700_fpu_pipe")
+
+;; Long FPU pipeline.
+(define_cpu_unit "mips_p8700_fpu_apu" "mips_p8700_fpu_pipe")
+
+;; P8700 unsupported insns are mapped to dummies reservations
+(define_reservation "mips_p8700_dummies"
+ "mips_p8700_agq | mips_p8700_al2 | mips_p8700_ctistd | mips_p8700_lsu |
+ mips_p8700_fpu_short | mips_p8700_fpu_long")
+
+(define_reservation "mips_p8700_agq_al2" "mips_p8700_agq, mips_p8700_al2")
+(define_reservation "mips_p8700_agq_ctistd" "mips_p8700_agq, mips_p8700_ctistd")
+(define_reservation "mips_p8700_agq_lsu" "mips_p8700_agq, mips_p8700_lsu")
+(define_reservation "mips_p8700_alq_alu" "mips_p8700_alq, mips_p8700_alu")
+
+;;
+;; FPU pipe
+;;
+
+(define_insn_reservation "mips_p8700_fpu_fadd" 4
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fadd"))
+ "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fabs" 2
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fcmp,fmove"))
+ "mips_p8700_fpu_short, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fload" 8
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fpload"))
+ "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fstore" 1
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fpstore"))
+ "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fmadd" 8
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fmadd"))
+ "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fmul" 5
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fmul"))
+ "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_div" 17
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fdiv,fsqrt"))
+ "mips_p8700_fpu_long, mips_p8700_fpu_apu*17")
+
+(define_insn_reservation "mips_p8700_fpu_fcvt" 4
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i"))
+ "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fmtc" 7
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "mtc"))
+ "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fmfc" 7
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "mfc"))
+ "mips_p8700_agq_lsu")
+
+;;
+;; Integer pipe
+;;
+
+(define_insn_reservation "mips_p8700_int_load" 4
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "load"))
+ "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_int_store" 3
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "store"))
+ "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_int_arith_1" 1
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,logical,move,bitmanip,min,max,minu,maxu,clz,ctz,rotate,atomic,condmove,crypto,mvpair,zicond"))
+ "mips_p8700_alq_alu | mips_p8700_agq_al2")
+
+(define_insn_reservation "mips_p8700_int_nop" 0
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "nop"))
+ "mips_p8700_alq_alu | mips_p8700_agq_al2")
+
+(define_insn_reservation "mips_p8700_dsp_mult" 4
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "imul,cpop,clmul"))
+ "mips_p8700_gpmul")
+
+(define_insn_reservation "mips_p8700_int_div" 8
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "idiv"))
+ "mips_p8700_gpdiv*5")
+
+(define_insn_reservation "mips_p8700_int_branch" 1
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "branch,jump,ret,sfb_alu,trap"))
+ "mips_p8700_agq_ctistd")
+
+(define_insn_reservation "mips_p8700_int_call" 2
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "call,jalr"))
+ "mips_p8700_agq_ctistd")
+
+;; mips-p8700 dummies insn and placeholder that had no mapping to p8700 hardware.
+(define_insn_reservation "mips_p8700_unknown" 1
+ (and (eq_attr "tune" "mips_p8700")
+ (eq_attr "type" "rdvlenb,rdvl,wrvxrm,wrfrm,
+ rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,
+ vldux,vldox,vstux,vstox,vldff,vldr,vstr,
+ vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,
+ vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,
+ vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,
+ vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,
+ vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,
+ vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,
+ vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
+ vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,
+ vired,viwred,vfredu,vfredo,vfwredu,vfwredo,
+ vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,
+ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
+ vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
+ vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
+ vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+ sf_vc,sf_vc_se"))
+ "mips_p8700_dummies")
diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator
index 4828016..6ad1cf0 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -159,8 +159,8 @@ for cmodel in cmodels:
"e.g. rv32imafd-ilp32--" % cfg)
sys.exit(1)
- # Compact code model only support rv64.
- if cmodel == "compact" and arch.startswith("rv32"):
+ # Large code model only support rv64.
+ if cmodel == "large" and arch.startswith("rv32"):
continue
arch = arch_canonicalize (arch, args.misa_spec)
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f26bafc..8072d67 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -27,6 +27,18 @@
(ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
+;; REG or REG+D where D fits in a simm12 and has the low 4 bits
+;; off. The REG+D form can be reloaded into a temporary if needed
+;; after FP elimination if that exposes an invalid offset.
+(define_predicate "prefetch_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_test "const_arith_operand (op, VOIDmode)")
+ (match_test "(INTVAL (op) & 0xf) == 0"))
+ (and (match_code "plus")
+ (match_test "register_operand (XEXP (op, 0), word_mode)")
+ (match_test "const_arith_operand (XEXP (op, 1), VOIDmode)")
+ (match_test "(INTVAL (XEXP (op, 1)) & 0xf) == 0"))))
+
(define_predicate "lui_operand"
(and (match_code "const_int")
(match_test "LUI_OPERAND (INTVAL (op))")))
@@ -380,14 +392,6 @@
(and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
-;; Register, small constant or single bit constant for use in
-;; bseti/binvi.
-(define_predicate "arith_or_zbs_operand"
- (ior (match_operand 0 "const_arith_operand")
- (match_operand 0 "register_operand")
- (and (match_test "TARGET_ZBS")
- (match_operand 0 "single_bit_mask_operand"))))
-
(define_predicate "not_single_bit_mask_operand"
(and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
@@ -685,3 +689,11 @@
(and (match_operand 0 "register_operand")
(match_test "REGNO (op) == RETURN_ADDR_REGNUM
|| REGNO (op) == T0_REGNUM")))
+
+(define_predicate "bitpos_mask_operand"
+ (and (match_code "const_int")
+ (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31")))
+
+(define_predicate "reg_or_const_int_operand"
+ (ior (match_operand 0 "const_int_operand")
+ (match_operand 0 "register_operand")))
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 7912b10..d2c0af3 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -36,10 +36,10 @@ along with GCC; see the file COPYING3. If not see
struct pragma_intrinsic_flags
{
- int intrinsic_target_flags;
+ int intrinsic_riscv_isa_flags;
int intrinsic_riscv_vector_elen_flags;
- int intrinsic_riscv_zvl_flags;
+ int intrinsic_riscv_zvl_subext;
int intrinsic_riscv_zvb_subext;
int intrinsic_riscv_zvk_subext;
};
@@ -47,16 +47,16 @@ struct pragma_intrinsic_flags
static void
riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags)
{
- flags->intrinsic_target_flags = target_flags;
+ flags->intrinsic_riscv_isa_flags = riscv_isa_flags;
flags->intrinsic_riscv_vector_elen_flags = riscv_vector_elen_flags;
- flags->intrinsic_riscv_zvl_flags = riscv_zvl_flags;
+ flags->intrinsic_riscv_zvl_subext = riscv_zvl_subext;
flags->intrinsic_riscv_zvb_subext = riscv_zvb_subext;
flags->intrinsic_riscv_zvk_subext = riscv_zvk_subext;
- target_flags = target_flags
+ riscv_isa_flags = riscv_isa_flags
| MASK_VECTOR;
- riscv_zvl_flags = riscv_zvl_flags
+ riscv_zvl_subext = riscv_zvl_subext
| MASK_ZVL32B
| MASK_ZVL64B
| MASK_ZVL128B
@@ -97,10 +97,10 @@ riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags)
static void
riscv_pragma_intrinsic_flags_restore (struct pragma_intrinsic_flags *flags)
{
- target_flags = flags->intrinsic_target_flags;
+ riscv_isa_flags = flags->intrinsic_riscv_isa_flags;
riscv_vector_elen_flags = flags->intrinsic_riscv_vector_elen_flags;
- riscv_zvl_flags = flags->intrinsic_riscv_zvl_flags;
+ riscv_zvl_subext = flags->intrinsic_riscv_zvl_subext;
riscv_zvb_subext = flags->intrinsic_riscv_zvb_subext;
riscv_zvk_subext = flags->intrinsic_riscv_zvk_subext;
}
@@ -239,26 +239,22 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
size_t max_ext_len = 0;
/* Figure out the max length of extension name for reserving buffer. */
- for (const riscv_subset_t *subset = subset_list->begin ();
- subset != subset_list->end ();
- subset = subset->next)
- max_ext_len = MAX (max_ext_len, subset->name.length ());
+ for (auto &subset : *subset_list)
+ max_ext_len = MAX (max_ext_len, subset.name.length ());
char *buf = (char *)alloca (max_ext_len + 10 /* For __riscv_ and '\0'. */);
- for (const riscv_subset_t *subset = subset_list->begin ();
- subset != subset_list->end ();
- subset = subset->next)
+ for (auto &subset : *subset_list)
{
- int version_value = riscv_ext_version_value (subset->major_version,
- subset->minor_version);
+ int version_value = riscv_ext_version_value (subset.major_version,
+ subset.minor_version);
/* Special rule for zicsr and zifencei, it's used for ISA spec 2.2 or
earlier. */
- if ((subset->name == "zicsr" || subset->name == "zifencei")
+ if ((subset.name == "zicsr" || subset.name == "zifencei")
&& version_value == 0)
version_value = riscv_ext_version_value (2, 0);
- sprintf (buf, "__riscv_%s", subset->name.c_str ());
+ sprintf (buf, "__riscv_%s", subset.name.c_str ());
builtin_define_with_int_value (buf, version_value);
}
}
@@ -279,7 +275,8 @@ riscv_pragma_intrinsic (cpp_reader *)
const char *name = TREE_STRING_POINTER (x);
if (strcmp (name, "vector") == 0
- || strcmp (name, "xtheadvector") == 0)
+ || strcmp (name, "xtheadvector") == 0
+ || strcmp (name, "xsfvcp") == 0)
{
struct pragma_intrinsic_flags backup_flags;
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 2918496..2096c00 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -33,6 +33,7 @@
#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
#endif
+RISCV_TUNE("generic", generic, generic_tune_info)
RISCV_TUNE("rocket", generic, rocket_tune_info)
RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
@@ -41,9 +42,17 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info)
RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info)
RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info)
RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
+RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info)
RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
RISCV_TUNE("size", generic, optimize_size_tune_info)
+RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info)
#undef RISCV_TUNE
@@ -93,6 +102,48 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
"xtheadmemidx_xtheadmempair_xtheadsync",
"thead-c906")
+RISCV_CORE("xt-c908", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_"
+ "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+ "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+ "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+ "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c908")
+RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
+ "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+ "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
+ "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
+ "xtheadfmemidx_xtheadmac_xtheadmemidx_"
+ "xtheadmempair_xtheadsync_xtheadvdot",
+ "xt-c908")
+RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c910")
+RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
+ "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+ "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync",
+ "xt-c910v2")
+RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+ "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+ "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+ "xtheadmemidx_xtheadmempair_xtheadsync_"
+ "xtheadvector",
+ "xt-c910")
+RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
+ "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+ "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+ "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
+ "svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+ "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+ "xtheadmac_xtheadmemidx_xtheadmempair_"
+ "xtheadsync_xtheadvdot",
+ "xt-c920v2")
+
RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
"ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_"
"zifencei_zihintntl_zihintpause_zimop_za64rs_"
@@ -104,4 +155,21 @@ RISCV_CORE("xiangshan-nanhu", "rv64imafdc_zba_zbb_zbc_zbs_"
"zbkb_zbkc_zbkx_zknd_zkne_zknh_zksed_zksh_"
"svinval_zicbom_zicboz",
"xiangshan-nanhu")
+
+RISCV_CORE("xiangshan-kunminghu", "rv64imafdcbvh_sdtrig_sha_shcounterenw_"
+ "shgatpa_shlcofideleg_shtvala_shvsatpa_shvstvala_shvstvecd_"
+ "smaia_smcsrind_smdbltrp_smmpm_smnpm_smrnmi_smstateen_"
+ "ssaia_ssccptr_sscofpmf_sscounterenw_sscsrind_ssdbltrp_"
+ "ssnpm_sspm_ssstateen_ssstrict_sstc_sstvala_sstvecd_"
+ "ssu64xl_supm_svade_svbare_svinval_svnapot_svpbmt_za64rs_"
+ "zacas_zawrs_zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zcb_zcmop_"
+ "zfa_zfh_zfhmin_zic64b_zicbom_zicbop_zicboz_ziccif_"
+ "zicclsm_ziccrse_zicntr_zicond_zicsr_zifencei_zihintpause_"
+ "zihpm_zimop_zkn_zknd_zkne_zknh_zksed_zksh_zkt_zvbb_zvfh_"
+ "zvfhmin_zvkt_zvl128b_zvl32b_zvl64b",
+ "xiangshan-kunminghu")
+
+RISCV_CORE("mips-p8700", "rv64imafd_zicsr_zmmul_"
+ "zaamo_zalrsc_zba_zbb",
+ "mips-p8700")
#undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-ext-corev.def b/gcc/config/riscv/riscv-ext-corev.def
new file mode 100644
index 0000000..eb97399
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-corev.def
@@ -0,0 +1,87 @@
+/* CORE-V extension definition file for RISC-V.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xcvalu,
+ /* UPPERCAE_NAME */ XCVALU,
+ /* FULL_NAME */ "Core-V miscellaneous ALU extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xcv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xcvbi,
+ /* UPPERCAE_NAME */ XCVBI,
+ /* FULL_NAME */ "xcvbi extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xcv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xcvelw,
+ /* UPPERCAE_NAME */ XCVELW,
+ /* FULL_NAME */ "Core-V event load word extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xcv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xcvmac,
+ /* UPPERCAE_NAME */ XCVMAC,
+ /* FULL_NAME */ "Core-V multiply-accumulate extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xcv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xcvsimd,
+ /* UPPERCAE_NAME */ XCVSIMD,
+ /* FULL_NAME */ "xcvsimd extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xcv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-sifive.def b/gcc/config/riscv/riscv-ext-sifive.def
new file mode 100644
index 0000000..c8d79da
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-sifive.def
@@ -0,0 +1,87 @@
+/* SiFive extension definition file for RISC-V.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xsfcease,
+ /* UPPERCAE_NAME */ XSFCEASE,
+ /* FULL_NAME */ "xsfcease extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xsf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xsfvcp,
+ /* UPPERCAE_NAME */ XSFVCP,
+ /* FULL_NAME */ "xsfvcp extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xsf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xsfvfnrclipxfqf,
+ /* UPPERCAE_NAME */ XSFVFNRCLIPXFQF,
+ /* FULL_NAME */ "xsfvfnrclipxfqf extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xsf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xsfvqmaccdod,
+ /* UPPERCAE_NAME */ XSFVQMACCDOD,
+ /* FULL_NAME */ "xsfvqmaccdod extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xsf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xsfvqmaccqoq,
+ /* UPPERCAE_NAME */ XSFVQMACCQOQ,
+ /* FULL_NAME */ "xsfvqmaccqoq extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xsf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-thead.def b/gcc/config/riscv/riscv-ext-thead.def
new file mode 100644
index 0000000..327d2ae
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-thead.def
@@ -0,0 +1,191 @@
+/* T-head extension definition file for RISC-V.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadba,
+ /* UPPERCAE_NAME */ XTHEADBA,
+ /* FULL_NAME */ "T-head address calculation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadbb,
+ /* UPPERCAE_NAME */ XTHEADBB,
+ /* FULL_NAME */ "T-head basic bit-manipulation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadbs,
+ /* UPPERCAE_NAME */ XTHEADBS,
+ /* FULL_NAME */ "T-head single-bit instructions extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadcmo,
+ /* UPPERCAE_NAME */ XTHEADCMO,
+ /* FULL_NAME */ "T-head cache management operations extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadcondmov,
+ /* UPPERCAE_NAME */ XTHEADCONDMOV,
+ /* FULL_NAME */ "T-head conditional move extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadfmemidx,
+ /* UPPERCAE_NAME */ XTHEADFMEMIDX,
+ /* FULL_NAME */ "T-head indexed memory operations for floating-point registers extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadfmv,
+ /* UPPERCAE_NAME */ XTHEADFMV,
+ /* FULL_NAME */ "T-head double floating-point high-bit data transmission extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadint,
+ /* UPPERCAE_NAME */ XTHEADINT,
+ /* FULL_NAME */ "T-head acceleration interruption extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadmac,
+ /* UPPERCAE_NAME */ XTHEADMAC,
+ /* FULL_NAME */ "T-head multiply-accumulate extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadmemidx,
+ /* UPPERCAE_NAME */ XTHEADMEMIDX,
+ /* FULL_NAME */ "T-head indexed memory operation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadmempair,
+ /* UPPERCAE_NAME */ XTHEADMEMPAIR,
+ /* FULL_NAME */ "T-head two-GPR memory operation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadsync,
+ /* UPPERCAE_NAME */ XTHEADSYNC,
+ /* FULL_NAME */ "T-head multi-core synchronization extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xtheadvector,
+ /* UPPERCAE_NAME */ XTHEADVECTOR,
+ /* FULL_NAME */ "xtheadvector extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xthead,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-ventana.def b/gcc/config/riscv/riscv-ext-ventana.def
new file mode 100644
index 0000000..deed47f
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-ventana.def
@@ -0,0 +1,35 @@
+/* Ventana extension definition file for RISC-V.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */
+
+DEFINE_RISCV_EXT(
+ /* NAME */ xventanacondops,
+ /* UPPERCAE_NAME */ XVENTANACONDOPS,
+ /* FULL_NAME */ "Ventana integer conditional operations extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ xventana,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def
new file mode 100644
index 0000000..816acaa
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.def
@@ -0,0 +1,2084 @@
+/* RISC-V extension definition file for RISC-V.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT:
+ NAME:
+ The name of the extension, e.g. "i".
+ UPPERCASE_NAME:
+ The name of the extension in uppercase, e.g. "ZBA", this used
+ for generate TARGET_<ext-name> marco and MASK_<ext-name> macro.
+ For those extension only named with single letter, it should also come with
+ 'RV', e.g. 'v' should use 'RVV' here.
+ Some of old extension like 'i' and 'm' are not follow the rule.
+ FULL_NAME:
+ The full name of the extension, e.g. "Base integer extension".
+ DESC:
+ A short description of the extension, this will used during generating
+ documentation, GNU Texinfo format can be used this field.
+ URL:
+ A URL for the extension.
+ DEP_EXTS:
+ A list of dependent extensions, this is a list of strings or
+ a list of tuples. The first element of the tuple is the name
+ of the extension and the second element is a function that
+ takes a subset_list and returns true if the extension should be added as
+ a dependent extension, `c` and `zca` are examples of this.
+ SUPPORTED_VERSIONS:
+ A list of tuples, each tuple contains the major version number, minor
+ version number and the class of the specification. The version number is a
+ list of integers, e.g. {2, 0} for version 2.0. The class is
+ a string, e.g. "ISA_SPEC_CLASS_20191213", the class of the
+ specification is not required for any new extension.
+ FLAG_GROUP:
+ The group of the extension, this is used to group extensions
+ together. The group is a string, e.g. "base", "zi", "zm", "za", "zf",
+ "zc", "zb", "zk" and "zi".
+ This should be auto generated in theory in some day...
+ BITMASK_GROUP_ID:
+ The group id of the extension for the __riscv_feature_bits.
+ this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED
+ if not got allocated.
+ https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions
+ BITMASK_BIT_POSITION:
+ The bit position of the extension for the __riscv_feature_bits.
+ this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED
+ if not got allocated.
+ https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions
+ EXTRA_EXTENSION_FLAGS:
+ Extra flags for the extension, this is a bitmask of the
+ extra flags. The extra flags are:
+ - EXT_FLAG_MACRO: Set this flag if this extension is just a macro of set of
+ extensions, and not define any new instrcutions, new CSRs or new
+ behaviors, the example is `b` extension is just a macro of `zba`, `zbb`
+ and `zbs`.
+*/
+
+DEFINE_RISCV_EXT(
+ /* NAME */ e,
+ /* UPPERCASE_NAME */ RVE,
+ /* FULL_NAME */ "Reduced base integer extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ i,
+ /* UPPERCASE_NAME */ RVI,
+ /* FULL_NAME */ "Base integer extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213},
+ {2, 1, ISA_SPEC_CLASS_20190608},
+ {2, 0, ISA_SPEC_CLASS_2P2}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 8,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ m,
+ /* UPPERCASE_NAME */ MUL,
+ /* FULL_NAME */ "Integer multiplication and division extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zmmul"}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 12,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ a,
+ /* UPPERCASE_NAME */ ATOMIC,
+ /* FULL_NAME */ "Atomic extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zaamo", "zalrsc"}),
+ /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213},
+ {2, 0, ISA_SPEC_CLASS_20190608},
+ {2, 0, ISA_SPEC_CLASS_2P2}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 0,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ f,
+ /* UPPERCASE_NAME */ HARD_FLOAT,
+ /* FULL_NAME */ "Single-precision floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213},
+ {2, 2, ISA_SPEC_CLASS_20190608},
+ {2, 0, ISA_SPEC_CLASS_2P2}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 5,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ d,
+ /* UPPERCASE_NAME */ DOUBLE_FLOAT,
+ /* FULL_NAME */ "Double-precision floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"f", "zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213},
+ {2, 2, ISA_SPEC_CLASS_20190608},
+ {2, 0, ISA_SPEC_CLASS_2P2}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 3,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ c,
+ /* UPPERCASE_NAME */ RVC,
+ /* FULL_NAME */ "Compressed extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca",
+ {"zcf",
+ [] (const riscv_subset_list *subset_list) -> bool
+ {
+ return subset_list->xlen () == 32
+ && subset_list->lookup ("f");
+ }},
+ {"zcd",
+ [] (const riscv_subset_list *subset_list) -> bool
+ {
+ return subset_list->lookup ("d");
+ }}}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 2,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ b,
+ /* UPPERCASE_NAME */ RVB,
+ /* FULL_NAME */ "b extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zba", "zbb", "zbs"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ v,
+ /* UPPERCASE_NAME */ RVV,
+ /* FULL_NAME */ "Vector extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl128b", "zve64d"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 21,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ h,
+ /* UPPERCASE_NAME */ RVH,
+ /* FULL_NAME */ "Hypervisor extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ base,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zic64b,
+ /* UPPERCASE_NAME */ ZIC64B,
+ /* FULL_NAME */ "Cache block size isf 64 bytes",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicbom,
+ /* UPPERCASE_NAME */ ZICBOM,
+ /* FULL_NAME */ "Cache-block management extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicbop,
+ /* UPPERCASE_NAME */ ZICBOP,
+ /* FULL_NAME */ "Cache-block prefetch extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicboz,
+ /* UPPERCASE_NAME */ ZICBOZ,
+ /* FULL_NAME */ "Cache-block zero extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 37,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ziccamoa,
+ /* UPPERCASE_NAME */ ZICCAMOA,
+ /* FULL_NAME */ "Main memory supports all atomics in A",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ziccif,
+ /* UPPERCASE_NAME */ ZICCIF,
+ /* FULL_NAME */ "Main memory supports instruction fetch with atomicity requirement",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicclsm,
+ /* UPPERCASE_NAME */ ZICCLSM,
+ /* FULL_NAME */ "Main memory supports misaligned loads/stores",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ziccrse,
+ /* UPPERCASE_NAME */ ZICCRSE,
+ /* FULL_NAME */ "Main memory supports forward progress on LR/SC sequences",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicfilp,
+ /* UPPERCASE_NAME */ ZICFILP,
+ /* FULL_NAME */ "zicfilp extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicfiss,
+ /* UPPERCASE_NAME */ ZICFISS,
+ /* FULL_NAME */ "zicfiss extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr", "zimop"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicntr,
+ /* UPPERCASE_NAME */ ZICNTR,
+ /* FULL_NAME */ "Standard extension for base counters and timers",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicond,
+ /* UPPERCASE_NAME */ ZICOND,
+ /* FULL_NAME */ "Integer conditional operations extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 38,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zicsr,
+ /* UPPERCASE_NAME */ ZICSR,
+ /* FULL_NAME */ "Control and status register access extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zifencei,
+ /* UPPERCASE_NAME */ ZIFENCEI,
+ /* FULL_NAME */ "Instruction-fetch fence extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zihintntl,
+ /* UPPERCASE_NAME */ ZIHINTNTL,
+ /* FULL_NAME */ "Non-temporal locality hints extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 39,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zihintpause,
+ /* UPPERCASE_NAME */ ZIHINTPAUSE,
+ /* FULL_NAME */ "Pause hint extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 40,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zihpm,
+ /* UPPERCASE_NAME */ ZIHPM,
+ /* FULL_NAME */ "Standard extension for hardware performance counters",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zimop,
+ /* UPPERCASE_NAME */ ZIMOP,
+ /* FULL_NAME */ "zimop extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 1,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zilsd,
+ /* UPPERCASE_NAME */ ZILSD,
+ /* FULL_NAME */ "Load/Store pair instructions extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zi,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 1,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zmmul,
+ /* UPPERCASE_NAME */ ZMMUL,
+ /* FULL_NAME */ "Integer multiplication extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ za128rs,
+ /* UPPERCASE_NAME */ ZA128RS,
+ /* FULL_NAME */ "Reservation set size of 128 bytes",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ za64rs,
+ /* UPPERCASE_NAME */ ZA64RS,
+ /* FULL_NAME */ "Reservation set size of 64 bytes",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zaamo,
+ /* UPPERCASE_NAME */ ZAAMO,
+ /* FULL_NAME */ "zaamo extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zabha,
+ /* UPPERCASE_NAME */ ZABHA,
+ /* FULL_NAME */ "zabha extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zaamo"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zacas,
+ /* UPPERCASE_NAME */ ZACAS,
+ /* FULL_NAME */ "zacas extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zaamo"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 26,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zalrsc,
+ /* UPPERCASE_NAME */ ZALRSC,
+ /* FULL_NAME */ "zalrsc extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zawrs,
+ /* UPPERCASE_NAME */ ZAWRS,
+ /* FULL_NAME */ "Wait-on-reservation-set extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zalrsc"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 7,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zama16b,
+ /* UPPERCASE_NAME */ ZAMA16B,
+ /* FULL_NAME */ "Zama16b extension",
+ /* DESC */ "Misaligned loads, stores, and AMOs to main memory regions that do"
+ " not cross a naturally aligned 16-byte boundary are atomic.",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ za,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zfa,
+ /* UPPERCASE_NAME */ ZFA,
+ /* FULL_NAME */ "Additional floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"f"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zf,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 34,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zfbfmin,
+ /* UPPERCASE_NAME */ ZFBFMIN,
+ /* FULL_NAME */ "zfbfmin extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zfhmin"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zfh,
+ /* UPPERCASE_NAME */ ZFH,
+ /* FULL_NAME */ "Half-precision floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zfhmin"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zf,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 35,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zfhmin,
+ /* UPPERCASE_NAME */ ZFHMIN,
+ /* FULL_NAME */ "Minimal half-precision floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"f"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zf,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 36,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zfinx,
+ /* UPPERCASE_NAME */ ZFINX,
+ /* FULL_NAME */ "Single-precision floating-point in integer registers extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zinx,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zdinx,
+ /* UPPERCASE_NAME */ ZDINX,
+ /* FULL_NAME */ "Double-precision floating-point in integer registers extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zfinx", "zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zinx,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zca,
+ /* UPPERCASE_NAME */ ZCA,
+ /* FULL_NAME */ "Integer compressed instruction extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({{"c",
+[] (const riscv_subset_list *subset_list) -> bool
+{
+ /* For RV32 Zca implies C for one of these combinations of
+ extensions: Zca, F_Zca_Zcf and FD_Zca_Zcf_Zcd. */
+ if (subset_list->xlen () == 32)
+ {
+ if (subset_list->lookup ("d"))
+ return subset_list->lookup ("zcf") && subset_list->lookup ("zcd");
+
+ if (subset_list->lookup ("f"))
+ return subset_list->lookup ("zcf");
+
+ return true;
+ }
+
+ /* For RV64 Zca implies C for one of these combinations of
+ extensions: Zca and FD_Zca_Zcd (Zcf is not available
+ for RV64). */
+ if (subset_list->xlen () == 64)
+ {
+ if (subset_list->lookup ("d"))
+ return subset_list->lookup ("zcd");
+
+ return true;
+ }
+
+ /* Do nothing for future RV128 specification. Behaviour
+ for this case is not yet well defined. */
+ return false;
+
+}}}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 2,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zcb,
+ /* UPPERCASE_NAME */ ZCB,
+ /* FULL_NAME */ "Simple compressed instruction extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 3,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zcd,
+ /* UPPERCASE_NAME */ ZCD,
+ /* FULL_NAME */ "Compressed double-precision floating point loads and stores extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 4,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zce,
+ /* UPPERCASE_NAME */ ZCE,
+ /* FULL_NAME */ "Compressed instruction extensions for embedded processors",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca", "zcb", "zcmp", "zcmt",
+ {"zcf",
+ [] (const riscv_subset_list *subset_list) -> bool
+ {
+ return subset_list->xlen () == 32
+ && subset_list->lookup ("f");
+ }}}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zcf,
+ /* UPPERCASE_NAME */ ZCF,
+ /* FULL_NAME */ "Compressed single-precision floating point loads and stores extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 5,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zcmop,
+ /* UPPERCASE_NAME */ ZCMOP,
+ /* FULL_NAME */ "zcmop extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 6,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zcmp,
+ /* UPPERCASE_NAME */ ZCMP,
+ /* FULL_NAME */ "Compressed push pop extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zcmt,
+ /* UPPERCASE_NAME */ ZCMT,
+ /* FULL_NAME */ "Table jump instruction extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca", "zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zclsd,
+ /* UPPERCASE_NAME */ ZCLSD,
+ /* FULL_NAME */ "Compressed load/store pair instructions extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zca", "zilsd"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zc,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zba,
+ /* UPPERCASE_NAME */ ZBA,
+ /* FULL_NAME */ "Address calculation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 27,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zbb,
+ /* UPPERCASE_NAME */ ZBB,
+ /* FULL_NAME */ "Basic bit manipulation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 28,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zbc,
+ /* UPPERCASE_NAME */ ZBC,
+ /* FULL_NAME */ "Carry-less multiplication extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 29,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zbkb,
+ /* UPPERCASE_NAME */ ZBKB,
+ /* FULL_NAME */ "Cryptography bit-manipulation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 30,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zbkc,
+ /* UPPERCASE_NAME */ ZBKC,
+ /* FULL_NAME */ "Cryptography carry-less multiply extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 31,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zbkx,
+ /* UPPERCASE_NAME */ ZBKX,
+ /* FULL_NAME */ "Cryptography crossbar permutation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 32,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zbs,
+ /* UPPERCASE_NAME */ ZBS,
+ /* FULL_NAME */ "Single-bit operation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 33,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zk,
+ /* UPPERCASE_NAME */ ZK,
+ /* FULL_NAME */ "Standard scalar cryptography extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zkn", "zkr", "zkt"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zkn,
+ /* UPPERCASE_NAME */ ZKN,
+ /* FULL_NAME */ "NIST algorithm suite extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zknd,
+ /* UPPERCASE_NAME */ ZKND,
+ /* FULL_NAME */ "AES Decryption extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 41,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zkne,
+ /* UPPERCASE_NAME */ ZKNE,
+ /* FULL_NAME */ "AES Encryption extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 42,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zknh,
+ /* UPPERCASE_NAME */ ZKNH,
+ /* FULL_NAME */ "Hash function extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 43,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zkr,
+ /* UPPERCASE_NAME */ ZKR,
+ /* FULL_NAME */ "Entropy source extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zks,
+ /* UPPERCASE_NAME */ ZKS,
+ /* FULL_NAME */ "ShangMi algorithm suite extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zksed", "zksh"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zksed,
+ /* UPPERCASE_NAME */ ZKSED,
+ /* FULL_NAME */ "SM4 block cipher extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 44,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zksh,
+ /* UPPERCASE_NAME */ ZKSH,
+ /* FULL_NAME */ "SM3 hash function extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 45,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zkt,
+ /* UPPERCASE_NAME */ ZKT,
+ /* FULL_NAME */ "Data independent execution latency extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 46,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ztso,
+ /* UPPERCASE_NAME */ ZTSO,
+ /* FULL_NAME */ "Total store ordering extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zt,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 47,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvbb,
+ /* UPPERCASE_NAME */ ZVBB,
+ /* FULL_NAME */ "Vector basic bit-manipulation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvkb"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 48,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvbc,
+ /* UPPERCASE_NAME */ ZVBC,
+ /* FULL_NAME */ "Vector carryless multiplication extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve64x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvb,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 49,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zve32f,
+ /* UPPERCASE_NAME */ ZVE32F,
+ /* FULL_NAME */ "Vector extensions for embedded processors",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"f", "zve32x", "zvl32b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zve,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 61,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zve32x,
+ /* UPPERCASE_NAME */ ZVE32X,
+ /* FULL_NAME */ "Vector extensions for embedded processors",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr", "zvl32b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zve,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 60,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zve64d,
+ /* UPPERCASE_NAME */ ZVE64D,
+ /* FULL_NAME */ "Vector extensions for embedded processors",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"d", "zve64f", "zvl64b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zve,
+ /* BITMASK_GROUP_ID */ 1,
+ /* BITMASK_BIT_POSITION*/ 0,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zve64f,
+ /* UPPERCASE_NAME */ ZVE64F,
+ /* FULL_NAME */ "Vector extensions for embedded processors",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"f", "zve32f", "zve64x", "zvl64b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zve,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 63,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zve64x,
+ /* UPPERCASE_NAME */ ZVE64X,
+ /* FULL_NAME */ "Vector extensions for embedded processors",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x", "zvl64b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zve,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 62,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvfbfmin,
+ /* UPPERCASE_NAME */ ZVFBFMIN,
+ /* FULL_NAME */ "Vector BF16 converts extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32f"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvfbfwma,
+ /* UPPERCASE_NAME */ ZVFBFWMA,
+ /* FULL_NAME */ "zvfbfwma extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvfbfmin", "zfbfmin"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvf,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvfh,
+ /* UPPERCASE_NAME */ ZVFH,
+ /* FULL_NAME */ "Vector half-precision floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32f", "zfhmin"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvf,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 50,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvfhmin,
+ /* UPPERCASE_NAME */ ZVFHMIN,
+ /* FULL_NAME */ "Vector minimal half-precision floating-point extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32f"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvf,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 51,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvkb,
+ /* UPPERCASE_NAME */ ZVKB,
+ /* FULL_NAME */ "Vector cryptography bit-manipulation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 52,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvkg,
+ /* UPPERCASE_NAME */ ZVKG,
+ /* FULL_NAME */ "Vector GCM/GMAC extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 53,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvkn,
+ /* UPPERCASE_NAME */ ZVKN,
+ /* FULL_NAME */ "Vector NIST Algorithm Suite extension",
+ /* DESC */ "@samp{zvkn} will expand to",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvkned", "zvknhb", "zvkb", "zvkt"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvknc,
+ /* UPPERCASE_NAME */ ZVKNC,
+ /* FULL_NAME */ "Vector NIST Algorithm Suite with carryless multiply extension, @samp{zvknc}",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvkn", "zvbc"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvkned,
+ /* UPPERCASE_NAME */ ZVKNED,
+ /* FULL_NAME */ "Vector AES block cipher extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 54,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvkng,
+ /* UPPERCASE_NAME */ ZVKNG,
+ /* FULL_NAME */ "Vector NIST Algorithm Suite with GCM extension, @samp{zvkng} will expand",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvkn", "zvkg"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvknha,
+ /* UPPERCASE_NAME */ ZVKNHA,
+ /* FULL_NAME */ "Vector SHA-2 secure hash extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 55,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvknhb,
+ /* UPPERCASE_NAME */ ZVKNHB,
+ /* FULL_NAME */ "Vector SHA-2 secure hash extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve64x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 56,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvks,
+ /* UPPERCASE_NAME */ ZVKS,
+ /* FULL_NAME */ "Vector ShangMi algorithm suite extension, @samp{zvks} will expand",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvksed", "zvksh", "zvkb", "zvkt"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvksc,
+ /* UPPERCASE_NAME */ ZVKSC,
+ /* FULL_NAME */ "Vector ShangMi algorithm suite with carryless multiplication extension,",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvks", "zvbc"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvksed,
+ /* UPPERCASE_NAME */ ZVKSED,
+ /* FULL_NAME */ "Vector SM4 Block Cipher extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 57,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvksg,
+ /* UPPERCASE_NAME */ ZVKSG,
+ /* FULL_NAME */ "Vector ShangMi algorithm suite with GCM extension, @samp{zvksg} will expand",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvks", "zvkg"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvksh,
+ /* UPPERCASE_NAME */ ZVKSH,
+ /* FULL_NAME */ "Vector SM3 Secure Hash extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zve32x"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 58,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvkt,
+ /* UPPERCASE_NAME */ ZVKT,
+ /* FULL_NAME */ "Vector data independent execution latency extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvk,
+ /* BITMASK_GROUP_ID */ 0,
+ /* BITMASK_BIT_POSITION*/ 59,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl1024b,
+ /* UPPERCASE_NAME */ ZVL1024B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl512b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl128b,
+ /* UPPERCASE_NAME */ ZVL128B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl64b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl16384b,
+ /* UPPERCASE_NAME */ ZVL16384B,
+ /* FULL_NAME */ "zvl16384b extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl8192b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl2048b,
+ /* UPPERCASE_NAME */ ZVL2048B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl1024b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl256b,
+ /* UPPERCASE_NAME */ ZVL256B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl128b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl32768b,
+ /* UPPERCASE_NAME */ ZVL32768B,
+ /* FULL_NAME */ "zvl32768b extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl16384b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl32b,
+ /* UPPERCASE_NAME */ ZVL32B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl4096b,
+ /* UPPERCASE_NAME */ ZVL4096B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl2048b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl512b,
+ /* UPPERCASE_NAME */ ZVL512B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl256b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl64b,
+ /* UPPERCASE_NAME */ ZVL64B,
+ /* FULL_NAME */ "Minimum vector length standard extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl32b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl65536b,
+ /* UPPERCASE_NAME */ ZVL65536B,
+ /* FULL_NAME */ "zvl65536b extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl32768b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zvl8192b,
+ /* UPPERCASE_NAME */ ZVL8192B,
+ /* FULL_NAME */ "zvl8192b extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zvl4096b"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zvl,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zhinx,
+ /* UPPERCASE_NAME */ ZHINX,
+ /* FULL_NAME */ "Half-precision floating-point in integer registers extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zhinxmin"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zinx,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ zhinxmin,
+ /* UPPERCASE_NAME */ ZHINXMIN,
+ /* FULL_NAME */ "Minimal half-precision floating-point in integer registers extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zfinx"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ zinx,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sdtrig,
+ /* UPPERCASE_NAME */ SDTRIG,
+ /* FULL_NAME */ "sdtrig extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sd,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sha,
+ /* UPPERCASE_NAME */ SHA,
+ /* FULL_NAME */ "The augmented hypervisor extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h", "shcounterenw", "shgatpa", "shtvala", "shvstvala", "shvstvecd", "shvsatpa", "ssstateen"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shcounterenw,
+ /* UPPERCASE_NAME */ SHCOUNTERENW,
+ /* FULL_NAME */ "Support writeable enables for any supported counter",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h", "zihpm"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shgatpa,
+ /* UPPERCASE_NAME */ SHGATPA,
+ /* FULL_NAME */ "SvNNx4 mode supported for all modes supported by satp",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h", "ssstateen"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shlcofideleg,
+ /* UPPERCASE_NAME */ SHLCOFIDELEG,
+ /* FULL_NAME */ "Delegating LCOFI interrupts to VS-mode",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shtvala,
+ /* UPPERCASE_NAME */ SHTVALA,
+ /* FULL_NAME */ "The htval register provides all needed values",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shvstvala,
+ /* UPPERCASE_NAME */ SHVSTVALA,
+ /* FULL_NAME */ "The vstval register provides all needed values",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shvstvecd,
+ /* UPPERCASE_NAME */ SHVSTVECD,
+ /* FULL_NAME */ "The vstvec register supports Direct mode",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ shvsatpa,
+ /* UPPERCASE_NAME */ SHVSATPA,
+ /* FULL_NAME */ "The vsatp register supports all modes supported by satp",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"h"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sh,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smaia,
+ /* UPPERCASE_NAME */ SMAIA,
+ /* FULL_NAME */ "Advanced interrupt architecture extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"ssaia"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smcntrpmf,
+ /* UPPERCASE_NAME */ SMCNTRPMF,
+ /* FULL_NAME */ "Cycle and instret privilege mode filtering",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smcsrind,
+ /* UPPERCASE_NAME */ SMCSRIND,
+ /* FULL_NAME */ "Machine-Level Indirect CSR Access",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr", "sscsrind"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smepmp,
+ /* UPPERCASE_NAME */ SMEPMP,
+ /* FULL_NAME */ "PMP Enhancements for memory access and execution prevention on Machine mode",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smmpm,
+ /* UPPERCASE_NAME */ SMMPM,
+ /* FULL_NAME */ "smmpm extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smnpm,
+ /* UPPERCASE_NAME */ SMNPM,
+ /* FULL_NAME */ "smnpm extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smrnmi,
+ /* UPPERCASE_NAME */ SMRNMI,
+ /* FULL_NAME */ "Resumable non-maskable interrupts",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smstateen,
+ /* UPPERCASE_NAME */ SMSTATEEN,
+ /* FULL_NAME */ "State enable extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"ssstateen"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ smdbltrp,
+ /* UPPERCASE_NAME */ SMDBLTRP,
+ /* FULL_NAME */ "Double Trap Extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sm,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssaia,
+ /* UPPERCASE_NAME */ SSAIA,
+ /* FULL_NAME */ "Advanced interrupt architecture extension for supervisor-mode",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssccptr,
+ /* UPPERCASE_NAME */ SSCCPTR,
+ /* FULL_NAME */ "Main memory supports page table reads",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sscofpmf,
+ /* UPPERCASE_NAME */ SSCOFPMF,
+ /* FULL_NAME */ "Count overflow & filtering extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sscounterenw,
+ /* UPPERCASE_NAME */ SSCOUNTERENW,
+ /* FULL_NAME */ "Support writeable enables for any supported counter",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sscsrind,
+ /* UPPERCASE_NAME */ SSCSRIND,
+ /* FULL_NAME */ "Supervisor-Level Indirect CSR Access",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssnpm,
+ /* UPPERCASE_NAME */ SSNPM,
+ /* FULL_NAME */ "ssnpm extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sspm,
+ /* UPPERCASE_NAME */ SSPM,
+ /* FULL_NAME */ "sspm extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssstateen,
+ /* UPPERCASE_NAME */ SSSTATEEN,
+ /* FULL_NAME */ "State-enable extension for supervisor-mode",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sstc,
+ /* UPPERCASE_NAME */ SSTC,
+ /* FULL_NAME */ "Supervisor-mode timer interrupts extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sstvala,
+ /* UPPERCASE_NAME */ SSTVALA,
+ /* FULL_NAME */ "Stval provides all needed values",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ sstvecd,
+ /* UPPERCASE_NAME */ SSTVECD,
+ /* FULL_NAME */ "Stvec supports Direct mode",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssstrict,
+ /* UPPERCASE_NAME */ SSSTRICT,
+ /* FULL_NAME */ "ssstrict extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssdbltrp,
+ /* UPPERCASE_NAME */ SSDBLTRP,
+ /* FULL_NAME */ "Double Trap Extensions",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ ssu64xl,
+ /* UPPERCASE_NAME */ SSU64XL,
+ /* FULL_NAME */ "UXLEN=64 must be supported",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ ss,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ supm,
+ /* UPPERCASE_NAME */ SUPM,
+ /* FULL_NAME */ "supm extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ su,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svinval,
+ /* UPPERCASE_NAME */ SVINVAL,
+ /* FULL_NAME */ "Fine-grained address-translation cache invalidation extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svnapot,
+ /* UPPERCASE_NAME */ SVNAPOT,
+ /* FULL_NAME */ "NAPOT translation contiguity extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svpbmt,
+ /* UPPERCASE_NAME */ SVPBMT,
+ /* FULL_NAME */ "Page-based memory types extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svvptc,
+ /* UPPERCASE_NAME */ SVVPTC,
+ /* FULL_NAME */ "svvptc extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svadu,
+ /* UPPERCASE_NAME */ SVADU,
+ /* FULL_NAME */ "Hardware Updating of A/D Bits extension",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svade,
+ /* UPPERCASE_NAME */ SVADE,
+ /* FULL_NAME */ "Cause exception when hardware updating of A/D bits is disabled",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+ /* NAME */ svbare,
+ /* UPPERCASE_NAME */ SVBARE,
+ /* FULL_NAME */ "Satp mode bare is supported",
+ /* DESC */ "",
+ /* URL */ ,
+ /* DEP_EXTS */ ({"zicsr"}),
+ /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+ /* FLAG_GROUP */ sv,
+ /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+ /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+ /* EXTRA_EXTENSION_FLAGS */ 0)
+
+#include "riscv-ext-corev.def"
+#include "riscv-ext-sifive.def"
+#include "riscv-ext-thead.def"
+#include "riscv-ext-ventana.def"
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
new file mode 100644
index 0000000..9f8c545
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -0,0 +1,447 @@
+; Target options for the RISC-V port of the compiler
+;
+; Copyright (C) 2025 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+; This file is generated automatically using
+; gcc/config/riscv/gen-riscv-ext-opt.cc from:
+; gcc/config/riscv/riscv-ext.def
+
+; Please *DO NOT* edit manually.
+TargetVariable
+int riscv_base_subext
+
+TargetVariable
+int riscv_sd_subext
+
+TargetVariable
+int riscv_sh_subext
+
+TargetVariable
+int riscv_sm_subext
+
+TargetVariable
+int riscv_ss_subext
+
+TargetVariable
+int riscv_su_subext
+
+TargetVariable
+int riscv_sv_subext
+
+TargetVariable
+int riscv_xcv_subext
+
+TargetVariable
+int riscv_xsf_subext
+
+TargetVariable
+int riscv_xthead_subext
+
+TargetVariable
+int riscv_xventana_subext
+
+TargetVariable
+int riscv_za_subext
+
+TargetVariable
+int riscv_zb_subext
+
+TargetVariable
+int riscv_zc_subext
+
+TargetVariable
+int riscv_zf_subext
+
+TargetVariable
+int riscv_zi_subext
+
+TargetVariable
+int riscv_zinx_subext
+
+TargetVariable
+int riscv_zk_subext
+
+TargetVariable
+int riscv_zm_subext
+
+TargetVariable
+int riscv_zt_subext
+
+TargetVariable
+int riscv_zvb_subext
+
+TargetVariable
+int riscv_zve_subext
+
+TargetVariable
+int riscv_zvf_subext
+
+TargetVariable
+int riscv_zvk_subext
+
+TargetVariable
+int riscv_zvl_subext
+
+Mask(RVE) Var(riscv_base_subext)
+
+Mask(RVI) Var(riscv_base_subext)
+
+Mask(MUL) Var(riscv_base_subext)
+
+Mask(ATOMIC) Var(riscv_base_subext)
+
+Mask(HARD_FLOAT) Var(riscv_base_subext)
+
+Mask(DOUBLE_FLOAT) Var(riscv_base_subext)
+
+Mask(RVC) Var(riscv_base_subext)
+
+Mask(RVB) Var(riscv_base_subext)
+
+Mask(RVV) Var(riscv_base_subext)
+
+Mask(RVH) Var(riscv_base_subext)
+
+Mask(ZIC64B) Var(riscv_zi_subext)
+
+Mask(ZICBOM) Var(riscv_zi_subext)
+
+Mask(ZICBOP) Var(riscv_zi_subext)
+
+Mask(ZICBOZ) Var(riscv_zi_subext)
+
+Mask(ZICCAMOA) Var(riscv_zi_subext)
+
+Mask(ZICCIF) Var(riscv_zi_subext)
+
+Mask(ZICCLSM) Var(riscv_zi_subext)
+
+Mask(ZICCRSE) Var(riscv_zi_subext)
+
+Mask(ZICFILP) Var(riscv_zi_subext)
+
+Mask(ZICFISS) Var(riscv_zi_subext)
+
+Mask(ZICNTR) Var(riscv_zi_subext)
+
+Mask(ZICOND) Var(riscv_zi_subext)
+
+Mask(ZICSR) Var(riscv_zi_subext)
+
+Mask(ZIFENCEI) Var(riscv_zi_subext)
+
+Mask(ZIHINTNTL) Var(riscv_zi_subext)
+
+Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
+
+Mask(ZIHPM) Var(riscv_zi_subext)
+
+Mask(ZIMOP) Var(riscv_zi_subext)
+
+Mask(ZILSD) Var(riscv_zi_subext)
+
+Mask(ZMMUL) Var(riscv_zm_subext)
+
+Mask(ZA128RS) Var(riscv_za_subext)
+
+Mask(ZA64RS) Var(riscv_za_subext)
+
+Mask(ZAAMO) Var(riscv_za_subext)
+
+Mask(ZABHA) Var(riscv_za_subext)
+
+Mask(ZACAS) Var(riscv_za_subext)
+
+Mask(ZALRSC) Var(riscv_za_subext)
+
+Mask(ZAWRS) Var(riscv_za_subext)
+
+Mask(ZAMA16B) Var(riscv_za_subext)
+
+Mask(ZFA) Var(riscv_zf_subext)
+
+Mask(ZFBFMIN) Var(riscv_zf_subext)
+
+Mask(ZFH) Var(riscv_zf_subext)
+
+Mask(ZFHMIN) Var(riscv_zf_subext)
+
+Mask(ZFINX) Var(riscv_zinx_subext)
+
+Mask(ZDINX) Var(riscv_zinx_subext)
+
+Mask(ZCA) Var(riscv_zc_subext)
+
+Mask(ZCB) Var(riscv_zc_subext)
+
+Mask(ZCD) Var(riscv_zc_subext)
+
+Mask(ZCE) Var(riscv_zc_subext)
+
+Mask(ZCF) Var(riscv_zc_subext)
+
+Mask(ZCMOP) Var(riscv_zc_subext)
+
+Mask(ZCMP) Var(riscv_zc_subext)
+
+Mask(ZCMT) Var(riscv_zc_subext)
+
+Mask(ZCLSD) Var(riscv_zc_subext)
+
+Mask(ZBA) Var(riscv_zb_subext)
+
+Mask(ZBB) Var(riscv_zb_subext)
+
+Mask(ZBC) Var(riscv_zb_subext)
+
+Mask(ZBKB) Var(riscv_zb_subext)
+
+Mask(ZBKC) Var(riscv_zb_subext)
+
+Mask(ZBKX) Var(riscv_zb_subext)
+
+Mask(ZBS) Var(riscv_zb_subext)
+
+Mask(ZK) Var(riscv_zk_subext)
+
+Mask(ZKN) Var(riscv_zk_subext)
+
+Mask(ZKND) Var(riscv_zk_subext)
+
+Mask(ZKNE) Var(riscv_zk_subext)
+
+Mask(ZKNH) Var(riscv_zk_subext)
+
+Mask(ZKR) Var(riscv_zk_subext)
+
+Mask(ZKS) Var(riscv_zk_subext)
+
+Mask(ZKSED) Var(riscv_zk_subext)
+
+Mask(ZKSH) Var(riscv_zk_subext)
+
+Mask(ZKT) Var(riscv_zk_subext)
+
+Mask(ZTSO) Var(riscv_zt_subext)
+
+Mask(ZVBB) Var(riscv_zvb_subext)
+
+Mask(ZVBC) Var(riscv_zvb_subext)
+
+Mask(ZVE32F) Var(riscv_zve_subext)
+
+Mask(ZVE32X) Var(riscv_zve_subext)
+
+Mask(ZVE64D) Var(riscv_zve_subext)
+
+Mask(ZVE64F) Var(riscv_zve_subext)
+
+Mask(ZVE64X) Var(riscv_zve_subext)
+
+Mask(ZVFBFMIN) Var(riscv_zvf_subext)
+
+Mask(ZVFBFWMA) Var(riscv_zvf_subext)
+
+Mask(ZVFH) Var(riscv_zvf_subext)
+
+Mask(ZVFHMIN) Var(riscv_zvf_subext)
+
+Mask(ZVKB) Var(riscv_zvk_subext)
+
+Mask(ZVKG) Var(riscv_zvk_subext)
+
+Mask(ZVKN) Var(riscv_zvk_subext)
+
+Mask(ZVKNC) Var(riscv_zvk_subext)
+
+Mask(ZVKNED) Var(riscv_zvk_subext)
+
+Mask(ZVKNG) Var(riscv_zvk_subext)
+
+Mask(ZVKNHA) Var(riscv_zvk_subext)
+
+Mask(ZVKNHB) Var(riscv_zvk_subext)
+
+Mask(ZVKS) Var(riscv_zvk_subext)
+
+Mask(ZVKSC) Var(riscv_zvk_subext)
+
+Mask(ZVKSED) Var(riscv_zvk_subext)
+
+Mask(ZVKSG) Var(riscv_zvk_subext)
+
+Mask(ZVKSH) Var(riscv_zvk_subext)
+
+Mask(ZVKT) Var(riscv_zvk_subext)
+
+Mask(ZVL1024B) Var(riscv_zvl_subext)
+
+Mask(ZVL128B) Var(riscv_zvl_subext)
+
+Mask(ZVL16384B) Var(riscv_zvl_subext)
+
+Mask(ZVL2048B) Var(riscv_zvl_subext)
+
+Mask(ZVL256B) Var(riscv_zvl_subext)
+
+Mask(ZVL32768B) Var(riscv_zvl_subext)
+
+Mask(ZVL32B) Var(riscv_zvl_subext)
+
+Mask(ZVL4096B) Var(riscv_zvl_subext)
+
+Mask(ZVL512B) Var(riscv_zvl_subext)
+
+Mask(ZVL64B) Var(riscv_zvl_subext)
+
+Mask(ZVL65536B) Var(riscv_zvl_subext)
+
+Mask(ZVL8192B) Var(riscv_zvl_subext)
+
+Mask(ZHINX) Var(riscv_zinx_subext)
+
+Mask(ZHINXMIN) Var(riscv_zinx_subext)
+
+Mask(SDTRIG) Var(riscv_sd_subext)
+
+Mask(SHA) Var(riscv_sh_subext)
+
+Mask(SHCOUNTERENW) Var(riscv_sh_subext)
+
+Mask(SHGATPA) Var(riscv_sh_subext)
+
+Mask(SHLCOFIDELEG) Var(riscv_sh_subext)
+
+Mask(SHTVALA) Var(riscv_sh_subext)
+
+Mask(SHVSTVALA) Var(riscv_sh_subext)
+
+Mask(SHVSTVECD) Var(riscv_sh_subext)
+
+Mask(SHVSATPA) Var(riscv_sh_subext)
+
+Mask(SMAIA) Var(riscv_sm_subext)
+
+Mask(SMCNTRPMF) Var(riscv_sm_subext)
+
+Mask(SMCSRIND) Var(riscv_sm_subext)
+
+Mask(SMEPMP) Var(riscv_sm_subext)
+
+Mask(SMMPM) Var(riscv_sm_subext)
+
+Mask(SMNPM) Var(riscv_sm_subext)
+
+Mask(SMRNMI) Var(riscv_sm_subext)
+
+Mask(SMSTATEEN) Var(riscv_sm_subext)
+
+Mask(SMDBLTRP) Var(riscv_sm_subext)
+
+Mask(SSAIA) Var(riscv_ss_subext)
+
+Mask(SSCCPTR) Var(riscv_ss_subext)
+
+Mask(SSCOFPMF) Var(riscv_ss_subext)
+
+Mask(SSCOUNTERENW) Var(riscv_ss_subext)
+
+Mask(SSCSRIND) Var(riscv_ss_subext)
+
+Mask(SSNPM) Var(riscv_ss_subext)
+
+Mask(SSPM) Var(riscv_ss_subext)
+
+Mask(SSSTATEEN) Var(riscv_ss_subext)
+
+Mask(SSTC) Var(riscv_ss_subext)
+
+Mask(SSTVALA) Var(riscv_ss_subext)
+
+Mask(SSTVECD) Var(riscv_ss_subext)
+
+Mask(SSSTRICT) Var(riscv_ss_subext)
+
+Mask(SSDBLTRP) Var(riscv_ss_subext)
+
+Mask(SSU64XL) Var(riscv_ss_subext)
+
+Mask(SUPM) Var(riscv_su_subext)
+
+Mask(SVINVAL) Var(riscv_sv_subext)
+
+Mask(SVNAPOT) Var(riscv_sv_subext)
+
+Mask(SVPBMT) Var(riscv_sv_subext)
+
+Mask(SVVPTC) Var(riscv_sv_subext)
+
+Mask(SVADU) Var(riscv_sv_subext)
+
+Mask(SVADE) Var(riscv_sv_subext)
+
+Mask(SVBARE) Var(riscv_sv_subext)
+
+Mask(XCVALU) Var(riscv_xcv_subext)
+
+Mask(XCVBI) Var(riscv_xcv_subext)
+
+Mask(XCVELW) Var(riscv_xcv_subext)
+
+Mask(XCVMAC) Var(riscv_xcv_subext)
+
+Mask(XCVSIMD) Var(riscv_xcv_subext)
+
+Mask(XSFCEASE) Var(riscv_xsf_subext)
+
+Mask(XSFVCP) Var(riscv_xsf_subext)
+
+Mask(XSFVFNRCLIPXFQF) Var(riscv_xsf_subext)
+
+Mask(XSFVQMACCDOD) Var(riscv_xsf_subext)
+
+Mask(XSFVQMACCQOQ) Var(riscv_xsf_subext)
+
+Mask(XTHEADBA) Var(riscv_xthead_subext)
+
+Mask(XTHEADBB) Var(riscv_xthead_subext)
+
+Mask(XTHEADBS) Var(riscv_xthead_subext)
+
+Mask(XTHEADCMO) Var(riscv_xthead_subext)
+
+Mask(XTHEADCONDMOV) Var(riscv_xthead_subext)
+
+Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext)
+
+Mask(XTHEADFMV) Var(riscv_xthead_subext)
+
+Mask(XTHEADINT) Var(riscv_xthead_subext)
+
+Mask(XTHEADMAC) Var(riscv_xthead_subext)
+
+Mask(XTHEADMEMIDX) Var(riscv_xthead_subext)
+
+Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext)
+
+Mask(XTHEADSYNC) Var(riscv_xthead_subext)
+
+Mask(XTHEADVECTOR) Var(riscv_xthead_subext)
+
+Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
+
diff --git a/gcc/config/riscv/riscv-ext.opt.urls b/gcc/config/riscv/riscv-ext.opt.urls
new file mode 100644
index 0000000..c4f4710
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.opt.urls
@@ -0,0 +1,2 @@
+; Autogenerated by regenerate-opt-urls.py from gcc/config/riscv/riscv-ext.opt and generated HTML
+
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 26fe228..e1a820b 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -58,7 +58,8 @@ enum riscv_microarchitecture_type {
sifive_p400,
sifive_p600,
xiangshan,
- generic_ooo
+ generic_ooo,
+ mips_p8700,
};
extern enum riscv_microarchitecture_type riscv_microarchitecture;
@@ -136,16 +137,16 @@ enum rvv_vector_bits_enum {
/* Bit of riscv_zvl_flags will set continually, N-1 bit will set if N-bit is
set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
popcount to calculate the minimal VLEN. */
-#define TARGET_MIN_VLEN \
- ((riscv_zvl_flags == 0) \
- ? 0 \
- : 32 << (__builtin_popcount (riscv_zvl_flags) - 1))
+#define TARGET_MIN_VLEN \
+ ((riscv_zvl_subext == 0) \
+ ? 0 \
+ : 32 << (__builtin_popcount (riscv_zvl_subext) - 1))
/* Same as TARGET_MIN_VLEN, but take an OPTS as gcc_options. */
#define TARGET_MIN_VLEN_OPTS(opts) \
- ((opts->x_riscv_zvl_flags == 0) \
+ ((opts->x_riscv_zvl_subext == 0) \
? 0 \
- : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
+ : 32 << (__builtin_popcount (opts->x_riscv_zvl_subext) - 1))
/* The maximum LMUL according to user configuration. */
#define TARGET_MAX_LMUL \
@@ -162,4 +163,15 @@ enum riscv_tls_type {
#define TARGET_VECTOR_AUTOVEC_SEGMENT \
(TARGET_VECTOR && riscv_mautovec_segment)
+#define GPR2VR_COST_UNPROVIDED -1
+#define FPR2VR_COST_UNPROVIDED -1
+
+/* Extra extension flags, used for carry extra info for a RISC-V extension. */
+enum
+{
+ EXT_FLAG_MACRO = 1 << 0,
+};
+
+#define BITMASK_NOT_YET_ALLOCATED -1
+
#endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index 7e6a2a0..bc803c4 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -21,3 +21,5 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
+
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2bedd87..a033120 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -140,6 +140,8 @@ extern void riscv_expand_sssub (rtx, rtx, rtx);
extern void riscv_expand_ustrunc (rtx, rtx);
extern void riscv_expand_sstrunc (rtx, rtx);
extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
+extern bool synthesize_ior_xor (rtx_code, rtx [3]);
+extern bool synthesize_and (rtx [3]);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -201,6 +203,8 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
+rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
+
/* Routines implemented in riscv-string.c. */
extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
@@ -664,6 +668,8 @@ void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
machine_mode);
void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
machine_mode);
+void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
+void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);
@@ -834,6 +840,8 @@ struct riscv_tune_info {
const struct riscv_tune_info *
riscv_parse_tune (const char *, bool);
const cpu_vector_cost *get_vector_costs ();
+int get_gr2vr_cost ();
+int get_fr2vr_cost ();
enum
{
diff --git a/gcc/config/riscv/riscv-shorten-memrefs.cc b/gcc/config/riscv/riscv-shorten-memrefs.cc
index 60f330e..2e3d9f6 100644
--- a/gcc/config/riscv/riscv-shorten-memrefs.cc
+++ b/gcc/config/riscv/riscv-shorten-memrefs.cc
@@ -189,8 +189,7 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb)
}
}
}
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
emit_insn_before (seq, insn);
}
}
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index 559e708..a35537d 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -82,6 +82,8 @@ private:
const char *parse_single_multiletter_ext (const char *, const char *,
const char *, bool);
+ std::string parse_profiles (const char*);
+
void handle_implied_ext (const char *);
bool check_implied_ext ();
void handle_combine_ext ();
@@ -107,9 +109,6 @@ public:
static riscv_subset_list *parse (const char *, location_t);
const char *parse_single_ext (const char *, bool exact_single_p = true);
- const riscv_subset_t *begin () const {return m_head;};
- const riscv_subset_t *end () const {return NULL;};
-
int match_score (riscv_subset_list *) const;
void set_loc (location_t);
@@ -117,6 +116,65 @@ public:
void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
void finalize ();
+
+ class iterator
+ {
+ public:
+ explicit iterator(riscv_subset_t *node) : m_node(node) {}
+
+ riscv_subset_t &operator*() const { return *m_node; }
+ riscv_subset_t *operator->() const { return m_node; }
+
+ iterator &operator++()
+ {
+ if (m_node)
+ m_node = m_node->next;
+ return *this;
+ }
+
+ bool operator!=(const iterator &other) const
+ {
+ return m_node != other.m_node;
+ }
+
+ bool operator==(const iterator &other) const
+ {
+ return m_node == other.m_node;
+ }
+
+ private:
+ riscv_subset_t *m_node;
+ };
+
+ iterator begin() { return iterator(m_head); }
+ iterator end() { return iterator(nullptr); }
+
+ class const_iterator
+ {
+ public:
+ explicit const_iterator(const riscv_subset_t *node) : m_node(node) {}
+
+ const riscv_subset_t &operator*() const { return *m_node; }
+ const riscv_subset_t *operator->() const { return m_node; }
+
+ const_iterator &operator++()
+ {
+ if (m_node)
+ m_node = m_node->next;
+ return *this;
+ }
+
+ bool operator!=(const const_iterator &other) const
+ {
+ return m_node != other.m_node;
+ }
+
+ private:
+ const riscv_subset_t *m_node;
+ };
+
+ const_iterator begin() const { return const_iterator(m_head); }
+ const_iterator end() const { return const_iterator(nullptr); }
};
extern const riscv_subset_list *riscv_cmdline_subset_list (void);
@@ -127,6 +185,5 @@ extern bool riscv_minimal_hwprobe_feature_bits (const char *,
location_t);
extern bool
riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *);
-extern int riscv_x_target_flags_isa_mask (void);
#endif /* ! GCC_RISCV_SUBSET_H */
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index 1d96865..8ad3025 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -257,11 +257,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
{
std::string local_arch = m_subset_list->to_string (true);
const char* local_arch_str = local_arch.c_str ();
- struct cl_target_option *default_opts
- = TREE_TARGET_OPTION (target_option_default_node);
- if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string)
- free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string));
- opts->x_riscv_arch_string = xstrdup (local_arch_str);
+ opts->x_riscv_arch_string = ggc_strdup (local_arch_str);
riscv_set_arch_by_subset_list (m_subset_list, opts);
}
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index aae2d27..ac690df 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1171,461 +1171,511 @@ expand_vector_init_trailing_same_elem (rtx target,
}
static void
-expand_const_vector (rtx target, rtx src)
+expand_const_vec_duplicate (rtx target, rtx src, rtx elt)
{
machine_mode mode = GET_MODE (target);
rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
- rtx elt;
- if (const_vec_duplicate_p (src, &elt))
+
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ {
+ gcc_assert (rtx_equal_p (elt, const0_rtx)
+ || rtx_equal_p (elt, const1_rtx));
+
+ rtx ops[] = {result, src};
+ emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
+ }
+ else if (valid_vec_immediate_p (src))
{
- if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
- {
- gcc_assert (rtx_equal_p (elt, const0_rtx)
- || rtx_equal_p (elt, const1_rtx));
- rtx ops[] = {result, src};
- emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
- }
/* Element in range -16 ~ 15 integer or 0.0 floating-point,
we use vmv.v.i instruction. */
- else if (valid_vec_immediate_p (src))
+ rtx ops[] = {result, src};
+ emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+ }
+ else
+ {
+ /* Emit vec_duplicate<mode> split pattern before RA so that
+ we could have a better optimization opportunity in LICM
+ which will hoist vmv.v.x outside the loop and in fwprop && combine
+ which will transform 'vv' into 'vx' instruction.
+
+ The reason we don't emit vec_duplicate<mode> split pattern during
+ RA since the split stage after RA is a too late stage to generate
+ RVV instruction which need an additional register (We can't
+ allocate a new register after RA) for VL operand of vsetvl
+ instruction (vsetvl a5, zero). */
+ if (lra_in_progress)
{
- rtx ops[] = {result, src};
- emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+ rtx ops[] = {result, elt};
+ emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
}
else
{
- /* Emit vec_duplicate<mode> split pattern before RA so that
- we could have a better optimization opportunity in LICM
- which will hoist vmv.v.x outside the loop and in fwprop && combine
- which will transform 'vv' into 'vx' instruction.
-
- The reason we don't emit vec_duplicate<mode> split pattern during
- RA since the split stage after RA is a too late stage to generate
- RVV instruction which need an additional register (We can't
- allocate a new register after RA) for VL operand of vsetvl
- instruction (vsetvl a5, zero). */
- if (lra_in_progress)
- {
- rtx ops[] = {result, elt};
- emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
- }
- else
- {
- struct expand_operand ops[2];
- enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
- gcc_assert (icode != CODE_FOR_nothing);
- create_output_operand (&ops[0], result, mode);
- create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
- expand_insn (icode, 2, ops);
- result = ops[0].value;
- }
+ struct expand_operand ops[2];
+ enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
+ gcc_assert (icode != CODE_FOR_nothing);
+ create_output_operand (&ops[0], result, mode);
+ create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
+ expand_insn (icode, 2, ops);
+ result = ops[0].value;
}
-
- if (result != target)
- emit_move_insn (target, result);
- return;
}
- /* Support scalable const series vector. */
- rtx base, step;
- if (const_vec_series_p (src, &base, &step))
- {
- expand_vec_series (result, base, step);
+ if (result != target)
+ emit_move_insn (target, result);
+}
- if (result != target)
- emit_move_insn (target, result);
- return;
+static void
+expand_const_vec_series (rtx target, rtx base, rtx step)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+ expand_vec_series (result, base, step);
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
+
+/* We handle the case that we can find a vector container to hold
+ element bitsize = NPATTERNS * ele_bitsize.
+
+ NPATTERNS = 8, element width = 8
+ v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+ In this case, we can combine NPATTERNS element into a larger
+ element. Use element width = 64 and broadcast a vector with
+ all element equal to 0x0706050403020100. */
+
+static void
+expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ rtx ele = builder->get_merged_repeating_sequence ();
+ rtx dup;
+
+ if (lra_in_progress)
+ {
+ dup = gen_reg_rtx (builder->new_mode ());
+ rtx ops[] = {dup, ele};
+ emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()),
+ UNARY_OP, ops);
}
+ else
+ dup = expand_vector_broadcast (builder->new_mode (), ele);
- /* Handle variable-length vector. */
- unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
- unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
- rvv_builder builder (mode, npatterns, nelts_per_pattern);
- for (unsigned int i = 0; i < nelts_per_pattern; i++)
+ emit_move_insn (result, gen_lowpart (mode, dup));
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
+/* We handle the case that we can't find a vector container to hold
+ element bitsize = NPATTERNS * ele_bitsize.
+
+ NPATTERNS = 8, element width = 16
+ v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+ Since NPATTERNS * element width = 128, we can't find a container
+ to hold it.
+
+ In this case, we use NPATTERNS merge operations to generate such
+ vector. */
+
+static void
+expand_const_vector_duplicate_default (rtx target, rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ unsigned int nbits = builder->npatterns () - 1;
+
+ /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
+ rtx vid = gen_reg_rtx (builder->int_mode ());
+ rtx op[] = {vid};
+ emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op);
+
+ /* Generate vid_repeat = { 0, 1, ... nbits, ... } */
+ rtx vid_repeat = gen_reg_rtx (builder->int_mode ());
+ rtx and_ops[] = {vid_repeat, vid,
+ gen_int_mode (nbits, builder->inner_int_mode ())};
+ emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP,
+ and_ops);
+
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx dup_ops[] = {tmp1, builder->elt (0)};
+ emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP,
+ dup_ops);
+
+ for (unsigned int i = 1; i < builder->npatterns (); i++)
{
- for (unsigned int j = 0; j < npatterns; j++)
- builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+ /* Generate mask according to i. */
+ rtx mask = gen_reg_rtx (builder->mask_mode ());
+ rtx const_vec = gen_const_vector_dup (builder->int_mode (), i);
+ expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
+
+ /* Merge scalar to each i. */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask};
+ insn_code icode = code_for_pred_merge_scalar (builder->mode ());
+ emit_vlmax_insn (icode, MERGE_OP, merge_ops);
+ tmp1 = tmp2;
}
- builder.finalize ();
- if (CONST_VECTOR_DUPLICATE_P (src))
+ emit_move_insn (result, tmp1);
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
+/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
+ E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
+ NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
+ The elements within NPATTERNS are not necessary regular. */
+static void
+expand_const_vector_duplicate (rtx target, rvv_builder *builder)
+{
+ if (builder->can_duplicate_repeating_sequence_p ())
+ return expand_const_vector_duplicate_repeating (target, builder);
+ else
+ return expand_const_vector_duplicate_default (target, builder);
+}
+
+static void
+expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+ /* Describe the case by choosing NPATTERNS = 4 as an example. */
+ insn_code icode;
+
+ /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
+ rtx vid = gen_reg_rtx (builder->mode ());
+ rtx vid_ops[] = {vid};
+ icode = code_for_pred_series (builder->mode ());
+ emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
+
+ if (builder->npatterns_all_equal_p ())
{
- /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
- E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
- NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
- The elements within NPATTERNS are not necessary regular. */
- if (builder.can_duplicate_repeating_sequence_p ())
+ /* Generate the variable-length vector following this rule:
+ { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
+ E.g. { 0, 0, 8, 8, 16, 16, ... } */
+
+ /* We want to create a pattern where value[idx] = floor (idx /
+ NPATTERNS). As NPATTERNS is always a power of two we can
+ rewrite this as = idx & -NPATTERNS. */
+ /* Step 2: VID AND -NPATTERNS:
+ { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */
+ rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ());
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx and_ops[] = {tmp1, vid, imm};
+ icode = code_for_pred_scalar (AND, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+ /* Step 3: Convert to step size 1. */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ /* log2 (npatterns) to get the shift amount to convert
+ Eg. { 0, 0, 0, 0, 4, 4, ... }
+ into { 0, 0, 0, 0, 1, 1, ... }. */
+ HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ());
+ rtx shift = gen_int_mode (shift_amt, builder->inner_mode ());
+ rtx shift_ops[] = {tmp2, tmp1, shift};
+ icode = code_for_pred_scalar (ASHIFTRT, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+ /* Step 4: Multiply to step size n. */
+ HOST_WIDE_INT step_size =
+ INTVAL (builder->elt (builder->npatterns ()))
+ - INTVAL (builder->elt (0));
+ rtx tmp3 = gen_reg_rtx (builder->mode ());
+ if (pow2p_hwi (step_size))
{
- /* We handle the case that we can find a vector container to hold
- element bitsize = NPATTERNS * ele_bitsize.
-
- NPATTERNS = 8, element width = 8
- v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
- In this case, we can combine NPATTERNS element into a larger
- element. Use element width = 64 and broadcast a vector with
- all element equal to 0x0706050403020100. */
- rtx ele = builder.get_merged_repeating_sequence ();
- rtx dup;
- if (lra_in_progress)
- {
- dup = gen_reg_rtx (builder.new_mode ());
- rtx ops[] = {dup, ele};
- emit_vlmax_insn (code_for_pred_broadcast
- (builder.new_mode ()), UNARY_OP, ops);
- }
- else
- dup = expand_vector_broadcast (builder.new_mode (), ele);
- emit_move_insn (result, gen_lowpart (mode, dup));
+ /* Power of 2 can be handled with a left shift. */
+ HOST_WIDE_INT shift = exact_log2 (step_size);
+ rtx shift_amount = gen_int_mode (shift, Pmode);
+ insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+ rtx ops[] = {tmp3, tmp2, shift_amount};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
}
else
{
- /* We handle the case that we can't find a vector container to hold
- element bitsize = NPATTERNS * ele_bitsize.
-
- NPATTERNS = 8, element width = 16
- v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
- Since NPATTERNS * element width = 128, we can't find a container
- to hold it.
-
- In this case, we use NPATTERNS merge operations to generate such
- vector. */
- unsigned int nbits = npatterns - 1;
-
- /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
- rtx vid = gen_reg_rtx (builder.int_mode ());
- rtx op[] = {vid};
- emit_vlmax_insn (code_for_pred_series (builder.int_mode ()),
- NULLARY_OP, op);
-
- /* Generate vid_repeat = { 0, 1, ... nbits, ... } */
- rtx vid_repeat = gen_reg_rtx (builder.int_mode ());
- rtx and_ops[] = {vid_repeat, vid,
- gen_int_mode (nbits, builder.inner_int_mode ())};
- emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()),
- BINARY_OP, and_ops);
-
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx dup_ops[] = {tmp1, builder.elt (0)};
- emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), UNARY_OP,
- dup_ops);
- for (unsigned int i = 1; i < builder.npatterns (); i++)
- {
- /* Generate mask according to i. */
- rtx mask = gen_reg_rtx (builder.mask_mode ());
- rtx const_vec = gen_const_vector_dup (builder.int_mode (), i);
- expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
-
- /* Merge scalar to each i. */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- rtx merge_ops[] = {tmp2, tmp1, builder.elt (i), mask};
- insn_code icode = code_for_pred_merge_scalar (builder.mode ());
- emit_vlmax_insn (icode, MERGE_OP, merge_ops);
- tmp1 = tmp2;
- }
- emit_move_insn (result, tmp1);
+ rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ());
+ insn_code icode = code_for_pred_scalar (MULT, builder->mode ());
+ rtx ops[] = {tmp3, tmp2, mult_amt};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+
+ /* Step 5: Add starting value to all elements. */
+ HOST_WIDE_INT init_val = INTVAL (builder->elt (0));
+ if (init_val == 0)
+ emit_move_insn (result, tmp3);
+ else
+ {
+ rtx dup = gen_const_vector_dup (builder->mode (), init_val);
+ rtx add_ops[] = {result, tmp3, dup};
+ icode = code_for_pred (PLUS, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, add_ops);
}
}
- else if (CONST_VECTOR_STEPPED_P (src))
+ else
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
- if (builder.single_step_npatterns_p ())
+ /* Generate the variable-length vector following this rule:
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
+ if (builder->npatterns_vid_diff_repeated_p ())
{
- /* Describe the case by choosing NPATTERNS = 4 as an example. */
- insn_code icode;
-
- /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
- rtx vid = gen_reg_rtx (builder.mode ());
- rtx vid_ops[] = {vid};
- icode = code_for_pred_series (builder.mode ());
- emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
-
- if (builder.npatterns_all_equal_p ())
+ /* Case 1: For example as below:
+ {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+ We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+ repeated as below after minus vid.
+ {3, 1, -1, -3, 3, 1, -1, -3...}
+ Then we can simplify the diff code gen to at most
+ npatterns(). */
+ rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+ /* Step 1: Generate diff = TARGET - VID. */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
{
- /* Generate the variable-length vector following this rule:
- { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
- E.g. { 0, 0, 8, 8, 16, 16, ... } */
-
- /* We want to create a pattern where value[idx] = floor (idx /
- NPATTERNS). As NPATTERNS is always a power of two we can
- rewrite this as = idx & -NPATTERNS. */
- /* Step 2: VID AND -NPATTERNS:
- { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
- */
- rtx imm
- = gen_int_mode (-builder.npatterns (), builder.inner_mode ());
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx and_ops[] = {tmp1, vid, imm};
- icode = code_for_pred_scalar (AND, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, and_ops);
-
- /* Step 3: Convert to step size 1. */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- /* log2 (npatterns) to get the shift amount to convert
- Eg. { 0, 0, 0, 0, 4, 4, ... }
- into { 0, 0, 0, 0, 1, 1, ... }. */
- HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ;
- rtx shift = gen_int_mode (shift_amt, builder.inner_mode ());
- rtx shift_ops[] = {tmp2, tmp1, shift};
- icode = code_for_pred_scalar (ASHIFTRT, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, shift_ops);
-
- /* Step 4: Multiply to step size n. */
- HOST_WIDE_INT step_size =
- INTVAL (builder.elt (builder.npatterns ()))
- - INTVAL (builder.elt (0));
- rtx tmp3 = gen_reg_rtx (builder.mode ());
- if (pow2p_hwi (step_size))
- {
- /* Power of 2 can be handled with a left shift. */
- HOST_WIDE_INT shift = exact_log2 (step_size);
- rtx shift_amount = gen_int_mode (shift, Pmode);
- insn_code icode = code_for_pred_scalar (ASHIFT, mode);
- rtx ops[] = {tmp3, tmp2, shift_amount};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
- else
- {
- rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ());
- insn_code icode = code_for_pred_scalar (MULT, builder.mode ());
- rtx ops[] = {tmp3, tmp2, mult_amt};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
-
- /* Step 5: Add starting value to all elements. */
- HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
- if (init_val == 0)
- emit_move_insn (result, tmp3);
- else
- {
- rtx dup = gen_const_vector_dup (builder.mode (), init_val);
- rtx add_ops[] = {result, tmp3, dup};
- icode = code_for_pred (PLUS, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, add_ops);
- }
+ poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i;
+ v.quick_push (gen_int_mode (diff, v.inner_mode ()));
}
- else
- {
- /* Generate the variable-length vector following this rule:
- { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
- if (builder.npatterns_vid_diff_repeated_p ())
- {
- /* Case 1: For example as below:
- {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
- We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
- repeated as below after minus vid.
- {3, 1, -1, -3, 3, 1, -1, -3...}
- Then we can simplify the diff code gen to at most
- npatterns(). */
- rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
- /* Step 1: Generate diff = TARGET - VID. */
- for (unsigned int i = 0; i < v.npatterns (); ++i)
- {
- poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
- v.quick_push (gen_int_mode (diff, v.inner_mode ()));
- }
-
- /* Step 2: Generate result = VID + diff. */
- rtx vec = v.build ();
- rtx add_ops[] = {result, vid, vec};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
- }
- else
- {
- /* Case 2: For example as below:
- { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
- */
- rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
- /* Step 1: Generate { a, b, a, b, ... } */
- for (unsigned int i = 0; i < v.npatterns (); ++i)
- v.quick_push (builder.elt (i));
- rtx new_base = v.build ();
-
- /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
- rtx shift_count
- = gen_int_mode (exact_log2 (builder.npatterns ()),
- builder.inner_mode ());
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx shift_ops[] = {tmp1, vid, shift_count};
- emit_vlmax_insn (code_for_pred_scalar
- (LSHIFTRT, builder.mode ()), BINARY_OP,
- shift_ops);
-
- /* Step 3: Generate tmp2 = tmp1 * step.  */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- rtx step
- = simplify_binary_operation (MINUS, builder.inner_mode (),
- builder.elt (v.npatterns()),
- builder.elt (0));
- expand_vec_series (tmp2, const0_rtx, step, tmp1);
-
- /* Step 4: Generate result = tmp2 + new_base.  */
- rtx add_ops[] = {result, tmp2, new_base};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
- }
- }
+ /* Step 2: Generate result = VID + diff. */
+ rtx vec = v.build ();
+ rtx add_ops[] = {result, vid, vec};
+ emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+ add_ops);
}
- else if (builder.interleaved_stepped_npatterns_p ())
+ else
{
- rtx base1 = builder.elt (0);
- rtx base2 = builder.elt (1);
- poly_int64 step1
- = rtx_to_poly_int64 (builder.elt (builder.npatterns ()))
- - rtx_to_poly_int64 (base1);
- poly_int64 step2
- = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1))
- - rtx_to_poly_int64 (base2);
+ /* Case 2: For example as below:
+ { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+ */
+ rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+ /* Step 1: Generate { a, b, a, b, ... } */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ v.quick_push (builder->elt (i));
+ rtx new_base = v.build ();
+
+ /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
+ rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()),
+ builder->inner_mode ());
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx shift_ops[] = {tmp1, vid, shift_count};
+ emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()),
+ BINARY_OP, shift_ops);
+
+ /* Step 3: Generate tmp2 = tmp1 * step.  */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ rtx step
+ = simplify_binary_operation (MINUS, builder->inner_mode (),
+ builder->elt (v.npatterns()),
+ builder->elt (0));
+ expand_vec_series (tmp2, const0_rtx, step, tmp1);
+
+ /* Step 4: Generate result = tmp2 + new_base.  */
+ rtx add_ops[] = {result, tmp2, new_base};
+ emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+ add_ops);
+ }
+ }
- /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
- integer vector mode to generate such vector efficiently.
+ if (result != target)
+ emit_move_insn (target, result);
+}
- E.g. EEW = 16, { 2, 0, 4, 0, ... }
+static void
+expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src,
+ rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ rtx base1 = builder->elt (0);
+ rtx base2 = builder->elt (1);
- can be interpreted into:
+ poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ()))
+ - rtx_to_poly_int64 (base1);
+ poly_int64 step2 =
+ rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1))
+ - rtx_to_poly_int64 (base2);
- EEW = 32, { 2, 4, ... }.
+ /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
+ integer vector mode to generate such vector efficiently.
- Both the series1 and series2 may overflow before taking the IOR
- to generate the final result. However, only series1 matters
- because the series2 will shift before IOR, thus the overflow
- bits will never pollute the final result.
+ E.g. EEW = 16, { 2, 0, 4, 0, ... }
- For now we forbid the negative steps and overflow, and they
- will fall back to the default merge way to generate the
- const_vector. */
+ can be interpreted into:
- unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
- scalar_int_mode new_smode;
- machine_mode new_mode;
- poly_uint64 new_nunits
- = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+ EEW = 32, { 2, 4, ... }.
- poly_int64 base1_poly = rtx_to_poly_int64 (base1);
- bool overflow_smode_p = false;
+ Both the series1 and series2 may overflow before taking the IOR
+ to generate the final result. However, only series1 matters
+ because the series2 will shift before IOR, thus the overflow
+ bits will never pollute the final result.
- if (!step1.is_constant ())
- overflow_smode_p = true;
- else
- {
- int elem_count = XVECLEN (src, 0);
- uint64_t step1_val = step1.to_constant ();
- uint64_t base1_val = base1_poly.to_constant ();
- uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+ For now we forbid the negative steps and overflow, and they
+ will fall back to the default merge way to generate the
+ const_vector. */
- if ((elem_val >> builder.inner_bits_size ()) != 0)
- overflow_smode_p = true;
- }
+ unsigned int new_smode_bitsize = builder->inner_bits_size () * 2;
+ scalar_int_mode new_smode;
+ machine_mode new_mode;
+ poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2);
+
+ poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+ bool overflow_smode_p = false;
+
+ if (!step1.is_constant ())
+ overflow_smode_p = true;
+ else
+ {
+ int elem_count = XVECLEN (src, 0);
+ uint64_t step1_val = step1.to_constant ();
+ uint64_t base1_val = base1_poly.to_constant ();
+ uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+ if ((elem_val >> builder->inner_bits_size ()) != 0)
+ overflow_smode_p = true;
+ }
- if (known_ge (step1, 0) && known_ge (step2, 0)
- && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
- && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
- && !overflow_smode_p)
+ if (known_ge (step1, 0) && known_ge (step2, 0)
+ && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
+ && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+ && !overflow_smode_p)
+ {
+ rtx tmp1 = gen_reg_rtx (new_mode);
+ base1 = gen_int_mode (base1_poly, new_smode);
+ expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
+
+ if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
+ /* { 1, 0, 2, 0, ... }. */
+ emit_move_insn (result, gen_lowpart (mode, tmp1));
+ else if (known_eq (step2, 0))
+ {
+ /* { 1, 1, 2, 1, ... }. */
+ rtx scalar = expand_simple_binop (
+ Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
+ gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false,
+ OPTAB_DIRECT);
+ scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
+ rtx tmp2 = gen_reg_rtx (new_mode);
+ rtx ior_ops[] = {tmp2, tmp1, scalar};
+ emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP,
+ ior_ops);
+ emit_move_insn (result, gen_lowpart (mode, tmp2));
+ }
+ else
+ {
+ /* { 1, 3, 2, 6, ... }. */
+ rtx tmp2 = gen_reg_rtx (new_mode);
+ base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
+ expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode));
+ rtx shifted_tmp2;
+ rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode);
+ if (lra_in_progress)
{
- rtx tmp1 = gen_reg_rtx (new_mode);
- base1 = gen_int_mode (base1_poly, new_smode);
- expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
-
- if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
- /* { 1, 0, 2, 0, ... }. */
- emit_move_insn (result, gen_lowpart (mode, tmp1));
- else if (known_eq (step2, 0))
- {
- /* { 1, 1, 2, 1, ... }. */
- rtx scalar = expand_simple_binop (
- Xmode, ASHIFT,
- gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
- gen_int_mode (builder.inner_bits_size (), Xmode),
- NULL_RTX, false, OPTAB_DIRECT);
- scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
- rtx tmp2 = gen_reg_rtx (new_mode);
- rtx ior_ops[] = {tmp2, tmp1, scalar};
- emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode),
- BINARY_OP, ior_ops);
- emit_move_insn (result, gen_lowpart (mode, tmp2));
- }
- else
- {
- /* { 1, 3, 2, 6, ... }. */
- rtx tmp2 = gen_reg_rtx (new_mode);
- base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
- expand_vec_series (tmp2, base2,
- gen_int_mode (step2, new_smode));
- rtx shifted_tmp2;
- rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
- if (lra_in_progress)
- {
- shifted_tmp2 = gen_reg_rtx (new_mode);
- rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
- emit_vlmax_insn (code_for_pred_scalar
- (ASHIFT, new_mode), BINARY_OP,
- shift_ops);
- }
- else
- shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2,
- shift, NULL_RTX, false,
- OPTAB_DIRECT);
- rtx tmp3 = gen_reg_rtx (new_mode);
- rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
- emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
- ior_ops);
- emit_move_insn (result, gen_lowpart (mode, tmp3));
- }
+ shifted_tmp2 = gen_reg_rtx (new_mode);
+ rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+ emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode),
+ BINARY_OP, shift_ops);
}
else
- {
- rtx vid = gen_reg_rtx (mode);
- expand_vec_series (vid, const0_rtx, const1_rtx);
- /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */
- rtx shifted_vid;
- if (lra_in_progress)
- {
- shifted_vid = gen_reg_rtx (mode);
- rtx shift = gen_int_mode (1, Xmode);
- rtx shift_ops[] = {shifted_vid, vid, shift};
- emit_vlmax_insn (code_for_pred_scalar
- (ASHIFT, mode), BINARY_OP,
- shift_ops);
- }
- else
- shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid,
- const1_rtx, NULL_RTX,
- false, OPTAB_DIRECT);
- rtx tmp1 = gen_reg_rtx (mode);
- rtx tmp2 = gen_reg_rtx (mode);
- expand_vec_series (tmp1, base1,
- gen_int_mode (step1, builder.inner_mode ()),
- shifted_vid);
- expand_vec_series (tmp2, base2,
- gen_int_mode (step2, builder.inner_mode ()),
- shifted_vid);
-
- /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */
- rtx and_vid = gen_reg_rtx (mode);
- rtx and_ops[] = {and_vid, vid, const1_rtx};
- emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP,
- and_ops);
- rtx mask = gen_reg_rtx (builder.mask_mode ());
- expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
-
- rtx ops[] = {result, tmp1, tmp2, mask};
- emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
- }
+ shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift,
+ NULL_RTX, false, OPTAB_DIRECT);
+ rtx tmp3 = gen_reg_rtx (new_mode);
+ rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
+ emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops);
+ emit_move_insn (result, gen_lowpart (mode, tmp3));
}
- else
- /* TODO: We will enable more variable-length vector in the future. */
- gcc_unreachable ();
}
else
- gcc_unreachable ();
+ {
+ rtx vid = gen_reg_rtx (mode);
+ expand_vec_series (vid, const0_rtx, const1_rtx);
+ /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */
+ rtx shifted_vid;
+ if (lra_in_progress)
+ {
+ shifted_vid = gen_reg_rtx (mode);
+ rtx shift = gen_int_mode (1, Xmode);
+ rtx shift_ops[] = {shifted_vid, vid, shift};
+ emit_vlmax_insn (code_for_pred_scalar (ASHIFT, mode), BINARY_OP,
+ shift_ops);
+ }
+ else
+ shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
+ NULL_RTX, false, OPTAB_DIRECT);
+ rtx tmp1 = gen_reg_rtx (mode);
+ rtx tmp2 = gen_reg_rtx (mode);
+ expand_vec_series (tmp1, base1,
+ gen_int_mode (step1, builder->inner_mode ()),
+ shifted_vid);
+ expand_vec_series (tmp2, base2,
+ gen_int_mode (step2, builder->inner_mode ()),
+ shifted_vid);
+
+ /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */
+ rtx and_vid = gen_reg_rtx (mode);
+ rtx and_ops[] = {and_vid, vid, const1_rtx};
+ emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+ rtx mask = gen_reg_rtx (builder->mask_mode ());
+ expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
+
+ rtx ops[] = {result, tmp1, tmp2, mask};
+ emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
+ }
if (result != target)
emit_move_insn (target, result);
}
+static void
+expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
+{
+ gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT);
+
+ if (builder->single_step_npatterns_p ())
+ return expand_const_vector_single_step_npatterns (target, builder);
+ else if (builder->interleaved_stepped_npatterns_p ())
+ return expand_const_vector_interleaved_stepped_npatterns (target, src,
+ builder);
+
+ /* TODO: We will enable more variable-length vector in the future. */
+ gcc_unreachable ();
+}
+
+static void
+expand_const_vector (rtx target, rtx src)
+{
+ rtx elt;
+ if (const_vec_duplicate_p (src, &elt))
+ return expand_const_vec_duplicate (target, src, elt);
+
+ /* Support scalable const series vector. */
+ rtx base, step;
+ if (const_vec_series_p (src, &base, &step))
+ return expand_const_vec_series(target, base, step);
+
+ /* Handle variable-length vector. */
+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
+ unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+ rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern);
+
+ for (unsigned int i = 0; i < nelts_per_pattern; i++)
+ {
+ for (unsigned int j = 0; j < npatterns; j++)
+ builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+ }
+
+ builder.finalize ();
+
+ if (CONST_VECTOR_DUPLICATE_P (src))
+ return expand_const_vector_duplicate (target, &builder);
+ else if (CONST_VECTOR_STEPPED_P (src))
+ return expand_const_vector_stepped (target, src, &builder);
+
+ gcc_unreachable ();
+}
+
/* Get the frm mode with given CONST_INT rtx, the default mode is
FRM_DYN. */
enum floating_point_rounding_mode
@@ -2771,6 +2821,28 @@ autovectorize_vector_modes (vector_modes *modes, bool)
i++;
size = base_size / (1U << i);
}
+
+ /* If the user specified the exact mode to use look if it is available and
+ remove all other ones before returning. */
+ if (riscv_autovec_mode)
+ {
+ auto_vector_modes ms;
+ ms.safe_splice (*modes);
+ modes->truncate (0);
+
+ for (machine_mode mode : ms)
+ {
+ if (!strcmp (GET_MODE_NAME (mode), riscv_autovec_mode))
+ {
+ modes->safe_push (mode);
+ return 0;
+ }
+ }
+
+ /* Nothing found, fall back to regular handling. */
+ modes->safe_splice (ms);
+ }
+
/* Enable LOOP_VINFO comparison in COST model. */
return VECT_COMPARE_COSTS;
}
@@ -5448,6 +5520,75 @@ expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode);
}
+/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1).
+ Aka the first op comes from the vec_duplicate, and the second op is
+ the vector reg. */
+
+void
+expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2,
+ rtx_code code, machine_mode mode)
+{
+ enum insn_code icode;
+
+ switch (code)
+ {
+ case PLUS:
+ case AND:
+ case IOR:
+ case XOR:
+ case MULT:
+ case SMAX:
+ case UMAX:
+ case SMIN:
+ case UMIN:
+ icode = code_for_pred_scalar (code, mode);
+ break;
+ case MINUS:
+ icode = code_for_pred_sub_reverse_scalar (mode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ rtx ops[] = {op_0, op_1, op_2};
+ emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x)).
+ Aka the second op comes from the vec_duplicate, and the first op is
+ the vector reg. */
+
+void
+expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2,
+ rtx_code code, machine_mode mode)
+{
+ enum insn_code icode;
+
+ switch (code)
+ {
+ case MINUS:
+ case AND:
+ case IOR:
+ case XOR:
+ case MULT:
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ case SMAX:
+ case UMAX:
+ case SMIN:
+ case UMIN:
+ icode = code_for_pred_scalar (code, mode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ rtx ops[] = {op_0, op_1, op_2};
+ emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
well. */
void
diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc
new file mode 100644
index 0000000..087f26a
--- /dev/null
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -0,0 +1,318 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM
+#define INCLUDE_FUNCTIONAL
+#define INCLUDE_MEMORY
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "target.h"
+#include "tree-pass.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "cfgcleanup.h"
+#include "insn-attr.h"
+#include "tm-constrs.h"
+#include "insn-opinit.h"
+#include "cfgrtl.h"
+
+/* So the basic idea of this pass is to identify loads of permutation
+ constants from the constant pool which could instead be trivially
+ derived from some earlier vector permutation constant. This will
+ replace a memory load from the constant pool with a vadd.vi
+ instruction.
+
+ Conceptually this is much like the related_values optimization in
+ CSE, reload_cse_move2add or using SLSR to optimize constant synthesis.
+ If we wanted to make this generic I would suggest putting it into CSE
+ and providing target hooks to determine if particular permutation
+ constants could be derived from earlier permutation constants. */
+
+const pass_data pass_data_vect_permconst = {
+ RTL_PASS, /* type */
+ "vect_permconst", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+/* Entry in the hash table. We "normalize" the permutation constant
+ by adjusting all entries by the value in the first element. This
+ allows simple hashing to discover permutation constants that differ
+ by a single constant across all their elements and may be derived
+ from each other with a vadd.vi. */
+
+struct vector_permconst_entry
+{
+ /* The CONST_VECTOR in normalized form (first entry is zero). */
+ /* We could avoid copying the vector with a more customized hash
+ routine which took care of normalization. */
+ rtx normalized_vec;
+
+ /* The destination register holding the CONST_VECTOR. When the optimization
+ applies this will be used as a source operand in the vadd.vi. */
+ rtx dest;
+
+ /* The insn generating DEST, the only reason we need this is because we
+ do not invalidate entries which implies we have to verify that DEST
+ is unchanged between INSN and the point where we want to use DEST
+ to derive a new permutation constant. */
+ rtx_insn *insn;
+
+ /* The bias of this entry used for normalization. If this value is added
+ to each element in NORMALIZED_VEC we would have the original permutation
+ constant. */
+ HOST_WIDE_INT bias;
+};
+
+struct const_vector_hasher : nofree_ptr_hash <vector_permconst_entry>
+{
+ static inline hashval_t hash (const vector_permconst_entry *);
+ static inline bool equal (const vector_permconst_entry *,
+ const vector_permconst_entry *);
+};
+
+inline bool
+const_vector_hasher::equal (const vector_permconst_entry *vpe1,
+ const vector_permconst_entry *vpe2)
+{
+ /* Do the cheap tests first, namely that the mode and number of entries
+ match between the two enries. */
+ if (GET_MODE (vpe1->normalized_vec) != GET_MODE (vpe2->normalized_vec))
+ return false;
+
+ if (CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ()
+ != CONST_VECTOR_NUNITS (vpe2->normalized_vec).to_constant ())
+ return false;
+
+ /* Check the value of each entry in the vector. We violate structure
+ sharing rules inside this pass, so while pointer equality would normally
+ be OK, it isn't here. */
+ for (int i = 0;
+ i < CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ();
+ i++)
+ if (!rtx_equal_p (CONST_VECTOR_ELT (vpe1->normalized_vec, i),
+ CONST_VECTOR_ELT (vpe2->normalized_vec, i)))
+ return false;
+
+ return true;
+}
+
+inline hashval_t
+const_vector_hasher::hash (const vector_permconst_entry *vpe)
+{
+ int do_not_record;
+ return hash_rtx (vpe->normalized_vec, Pmode, &do_not_record, NULL, false);
+}
+
+
+class vector_permconst : public rtl_opt_pass
+{
+public:
+ vector_permconst (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_vect_permconst, ctxt) {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) final override
+ {
+ return TARGET_VECTOR && optimize > 0;
+ }
+ virtual unsigned int execute (function *) final override;
+
+private:
+ void process_bb (basic_block);
+ hash_table<const_vector_hasher> *vector_permconst_table;
+}; // class pass_vector_permconst
+
+/* Try to optimize vector permutation constants in BB. */
+void
+vector_permconst::process_bb (basic_block bb)
+{
+ vector_permconst_table = new hash_table<const_vector_hasher> (11);
+
+ /* Walk the insns in BB searching for vector loads from the constant pool
+ which can be satisfied by adjusting an earlier load with trivial
+ arithmetic. */
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn))
+ continue;
+
+ rtx set = single_set (insn);
+ if (!set)
+ continue;
+
+ rtx dest = SET_DEST (set);
+ if (GET_MODE_CLASS (GET_MODE (dest)) != MODE_VECTOR_INT)
+ continue;
+
+ rtx src = SET_SRC (set);
+ if (!MEM_P (src))
+ continue;
+
+ /* A load from the constant pool should have a REG_EQUAL
+ note with the vector contant in the note. */
+ rtx note = find_reg_equal_equiv_note (insn);
+ if (!note
+ || REG_NOTE_KIND (note) != REG_EQUAL
+ || GET_CODE (XEXP (note, 0)) != CONST_VECTOR)
+ continue;
+
+ if (!CONST_VECTOR_NUNITS (XEXP (note, 0)).is_constant ())
+ continue;
+
+ /* XXX Do we need to consider other forms of constants? */
+
+ /* We want to be selective about what gets past this point since
+ we make a copy of the vector and possibly enter it into the
+ hash table. So reject cases that are not likely a permutation
+ constant. ie, negative bias and large biases. We arbitrarily
+ use 16k as the largest vector size in bits we try to optimize.
+
+ It may seem like a bias outside the range of vadd.vi should
+ be rejected, but what really matters is the difference of
+ biases across the two permutation constants. */
+ rtx cvec = XEXP (note, 0);
+ HOST_WIDE_INT bias = INTVAL (CONST_VECTOR_ELT (cvec, 0));
+ if (bias < 0 || bias > 16384 / 8)
+ continue;
+
+ /* We need to verify that each element would be a valid value
+ in the inner mode after applying the bias. */
+ machine_mode inner = GET_MODE_INNER (GET_MODE (cvec));
+ HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant ();
+ int i;
+ for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+ {
+ HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias;
+ if (val != sext_hwi (val, precision))
+ break;
+ }
+
+ /* If the loop terminated early, then we found a case where the
+ adjusted constant would not fit, so we can't record the constant
+ for this case (it's unlikely to be useful anyway. */
+ if (i != CONST_VECTOR_NUNITS (cvec).to_constant ())
+ continue;
+
+ /* At this point we have a load of a constant integer vector from the
+ constant pool. That constant integer vector is hopefully a
+ permutation constant. We need to make a copy of the vector and
+ normalize it to zero.
+
+ XXX This violates structure sharing conventions. */
+ rtvec_def *nvec = rtvec_alloc (CONST_VECTOR_NUNITS (cvec).to_constant ());
+
+ for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+ nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
+
+ rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
+
+ /* Now that we have a normalized vector, look it up in the hash table,
+ inserting it if it wasn't already in the table. */
+ struct vector_permconst_entry tmp;
+ tmp.normalized_vec = copy;
+ struct vector_permconst_entry **slot
+ = vector_permconst_table->find_slot (&tmp, INSERT);
+ if (*slot == NULL)
+ {
+ /* This constant was not in the table, so initialize the hash table
+ entry. */
+ *slot = XNEW (vector_permconst_entry);
+ (*slot)->normalized_vec = copy;
+ (*slot)->dest = dest;
+ (*slot)->bias = bias;
+ (*slot)->insn = insn;
+ }
+ else
+ {
+ /* A hit in the hash table. We may be able to optimize this case.
+
+ If the difference in biases fits in the immediate range for
+ vadd.vi, then we may optimize. */
+ HOST_WIDE_INT adjustment = bias - (*slot)->bias;
+ if (IN_RANGE (adjustment, -16, 15))
+ {
+ /* We also need to make sure the destination register was not
+ modified. I've chosen to test for that at optimization time
+ rather than invalidate entries in the table. This could be
+ changed to use REG_TICK like schemes or true invalidation if
+ this proves too compile-time costly. */
+ if (!reg_set_between_p ((*slot)->dest, (*slot)->insn, insn))
+ {
+ /* Instead of loading from the constant pool, adjust the
+ output of the earlier insn into our destination. */
+ rtx x = gen_const_vec_duplicate (GET_MODE (copy),
+ GEN_INT (adjustment));
+ rtx plus = gen_rtx_PLUS (GET_MODE (copy), (*slot)->dest, x);
+ rtx set = gen_rtx_SET (dest, plus);
+ rtx_insn *new_insn = emit_insn_after (set, insn);
+ /* XXX Should we copy over the REG_EQUAL note first? */
+ delete_insn (insn);
+ insn = new_insn;
+ }
+ }
+
+ /* We always keep the hash table entry pointing to the most recent
+ INSN that could generate the normalized entry. We can adjust
+ in the future if data says it's useful to do so. This just
+ keeps things simple for now.
+
+ For example, we might want to keep multiple entries if they
+ have a different biases. */
+ (*slot)->dest = dest;
+ (*slot)->bias = bias;
+ (*slot)->insn = insn;
+ }
+ }
+
+ /* We construct and tear down the table for each block. This may
+ be overly expensive. */
+ vector_permconst_table->empty ();
+}
+
+/* Main entry point for this pass. */
+unsigned int
+vector_permconst::execute (function *fn)
+{
+ /* Handle each block independently. While this should work nicely on EBBs,
+ let's wait for real world cases where it matters before adding that
+ complexity. */
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, fn)
+ process_bb (bb);
+
+ return 0;
+}
+
+rtl_opt_pass *
+make_pass_vector_permconst (gcc::context *ctxt)
+{
+ return new vector_permconst (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index fc21b20..9832eb9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -908,6 +908,8 @@ struct vset_def : public build_base
{
poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2));
+ if (maybe_eq (inner_size, 0))
+ return false;
unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
return c.require_immediate (1, 0, nvecs - 1);
}
@@ -920,6 +922,8 @@ struct vget_def : public misc_def
{
poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ());
+ if (maybe_eq (inner_size, 0))
+ return false;
unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
return c.require_immediate (1, 0, nvecs - 1);
}
@@ -1343,6 +1347,52 @@ struct sf_vfnrclip_def : public build_base
}
};
+/* sf_vcix_se_def class. */
+struct sf_vcix_se_def : public build_base
+{
+ char *get_name (function_builder &b, const function_instance &instance,
+ bool overloaded_p) const override
+ {
+ /* Return nullptr if it is overloaded. */
+ if (overloaded_p)
+ return nullptr;
+
+ b.append_base_name (instance.base_name);
+
+ /* vop --> vop<op>_se_<type>. */
+ if (!overloaded_p)
+ {
+ b.append_name (operand_suffixes[instance.op_info->op]);
+ b.append_name ("_se");
+ b.append_name (type_suffixes[instance.type.index].vector);
+ }
+ return b.finish_name ();
+ }
+};
+
+/* sf_vcix_def class. */
+struct sf_vcix_def : public build_base
+{
+ char *get_name (function_builder &b, const function_instance &instance,
+ bool overloaded_p) const override
+ {
+ /* Return nullptr if it is overloaded. */
+ if (overloaded_p)
+ return nullptr;
+
+ b.append_base_name (instance.base_name);
+
+ /* vop --> vop_<type>. */
+ if (!overloaded_p)
+ {
+ b.append_name (operand_suffixes[instance.op_info->op]);
+ b.append_name (type_suffixes[instance.type.index].vector);
+ }
+ return b.finish_name ();
+ }
+};
+
+
SHAPE(vsetvl, vsetvl)
SHAPE(vsetvl, vsetvlmax)
SHAPE(loadstore, loadstore)
@@ -1379,4 +1429,6 @@ SHAPE(crypto_vi, crypto_vi)
SHAPE(crypto_vv_no_op_type, crypto_vv_no_op_type)
SHAPE (sf_vqmacc, sf_vqmacc)
SHAPE (sf_vfnrclip, sf_vfnrclip)
+SHAPE(sf_vcix_se, sf_vcix_se)
+SHAPE(sf_vcix, sf_vcix)
} // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h b/gcc/config/riscv/riscv-vector-builtins-shapes.h
index 858799b..2f2636e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.h
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.h
@@ -62,6 +62,8 @@ extern const function_shape *const crypto_vv_no_op_type;
/* Sifive vendor extension. */
extern const function_shape *const sf_vqmacc;
extern const function_shape *const sf_vfnrclip;
+extern const function_shape *const sf_vcix_se;
+extern const function_shape *const sf_vcix;
}
} // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def
index 857b637..ade6644 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -369,6 +369,18 @@ along with GCC; see the file COPYING3. If not see
#define DEF_RVV_XFQF_OPS(TYPE, REQUIRE)
#endif
+/* Use "DEF_RVV_X2_U_OPS" macro include unsigned integer which will
+ be iterated and registered as intrinsic functions. */
+#ifndef DEF_RVV_X2_U_OPS
+#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE)
+#endif
+
+/* Use "DEF_RVV_X2_WU_OPS" macro include widen unsigned integer which will
+ be iterated and registered as intrinsic functions. */
+#ifndef DEF_RVV_X2_WU_OPS
+#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE)
+#endif
+
DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
DEF_RVV_I_OPS (vint8mf4_t, 0)
DEF_RVV_I_OPS (vint8mf2_t, 0)
@@ -1463,6 +1475,32 @@ DEF_RVV_XFQF_OPS (vint8mf2_t, 0)
DEF_RVV_XFQF_OPS (vint8m1_t, 0)
DEF_RVV_XFQF_OPS (vint8m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint8mf4_t, 0)
+DEF_RVV_X2_U_OPS (vuint8mf2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m4_t, 0)
+DEF_RVV_X2_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint16mf2_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m4_t, 0)
+DEF_RVV_X2_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint32m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint32m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint32m4_t, 0)
+
+DEF_RVV_X2_WU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_WU_OPS (vuint16mf2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m1_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m4_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_WU_OPS (vuint32m1_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32m2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32m4_t, 0)
+
#undef DEF_RVV_I_OPS
#undef DEF_RVV_U_OPS
#undef DEF_RVV_F_OPS
@@ -1519,3 +1557,5 @@ DEF_RVV_XFQF_OPS (vint8m2_t, 0)
#undef DEF_RVV_F32_OPS
#undef DEF_RVV_QMACC_OPS
#undef DEF_RVV_XFQF_OPS
+#undef DEF_RVV_X2_U_OPS
+#undef DEF_RVV_X2_WU_OPS
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 61dcdab..f652a12 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -544,6 +544,20 @@ static const rvv_type_info crypto_sew64_ops[] = {
#include "riscv-vector-builtins-types.def"
{NUM_VECTOR_TYPES, 0}};
+/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/
+/* functions. */
+static const rvv_type_info x2_u_ops[] = {
+#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+ {NUM_VECTOR_TYPES, 0}};
+
+/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/
+/* functions. */
+static const rvv_type_info x2_wu_ops[] = {
+#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+ {NUM_VECTOR_TYPES, 0}};
+
/* A list of signed integer will be registered for intrinsic
* functions. */
static const rvv_type_info qmacc_ops[] = {
@@ -805,7 +819,7 @@ static CONSTEXPR const rvv_arg_type_info bf_wwxv_args[]
static CONSTEXPR const rvv_arg_type_info m_args[]
= {rvv_arg_type_info (RVV_BASE_mask), rvv_arg_type_info_end};
-/* A list of args for vector_type func (scalar_type) function. */
+/* A list of args for vector_type func (scalar_type/sf.vc) function. */
static CONSTEXPR const rvv_arg_type_info x_args[]
= {rvv_arg_type_info (RVV_BASE_scalar), rvv_arg_type_info_end};
@@ -1055,6 +1069,161 @@ static CONSTEXPR const rvv_arg_type_info scalar_ptr_size_args[]
rvv_arg_type_info (RVV_BASE_size), rvv_arg_type_info (RVV_BASE_vector),
rvv_arg_type_info_end};
+/* A list of args for vector_type func (sf.vc.x) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_x_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.x) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_x_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.i) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_i_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+
+/* A list of args for vector_type func (sf.vc.i) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_i_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.vv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_vv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.xv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_xv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.iv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_iv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.iv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_iv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar_float),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.fv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_fv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar_float),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vvv/sf.vc.v.vvv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vvv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xvv/sf.vc.v.xvv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xvv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.ivv/sf.vc.v.ivv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_ivv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_unsigned_vector),
+ rvv_arg_type_info (RVV_BASE_unsigned_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fvv/sf.vc.v.fvv) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fvv_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar_float),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vvw/sf.vc.v.vvw) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vvw_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_x2_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xvw/sf.vc.v.xvw) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xvw_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_x2_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.ivw/sf.vc.v.ivw) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_ivw_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_x2_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fvw/sf.vc.v.fvw) function. */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fvw_args[]
+ = {rvv_arg_type_info (RVV_BASE_scalar),
+ rvv_arg_type_info (RVV_BASE_x2_vector),
+ rvv_arg_type_info (RVV_BASE_vector),
+ rvv_arg_type_info (RVV_BASE_scalar_float),
+ rvv_arg_type_info_end};
+
/* A list of none preds that will be registered for intrinsic functions. */
static CONSTEXPR const predication_type_index none_preds[]
= {PRED_TYPE_none, NUM_PRED_TYPES};
@@ -3006,6 +3175,174 @@ static CONSTEXPR const rvv_op_info u_vvvv_crypto_sew64_ops
rvv_arg_type_info (RVV_BASE_vector), /* Return type */
vvv_args /* Args */};
+static CONSTEXPR const rvv_op_info sf_vc_x_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_x, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_x_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_x_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_x, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_v_x_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_i_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_i, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_i_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_i_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_i, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_v_i_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_vv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_vv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_vv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_v_vv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_xv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_xv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_xv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_v_xv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_iv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_iv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_iv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_iv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_iv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_v_iv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fv_ops
+ = {wextu_ops, /* Types */
+ OP_TYPE_fv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_fv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fv_ops
+ = {wextu_ops, /* Types */
+ OP_TYPE_v_fv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_v_fv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vvv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_vvv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_vvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vvv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_vvv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_vvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xvv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_xvv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_xvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xvv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_xvv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_xvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_ivv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_ivv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_ivv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_ivv_ops
+ = {full_v_u_ops, /* Types */
+ OP_TYPE_v_ivv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_ivv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fvv_ops
+ = {wextu_ops, /* Types */
+ OP_TYPE_fvv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_fvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fvv_ops
+ = {wextu_ops, /* Types */
+ OP_TYPE_v_fvv, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ sf_vc_fvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vvw_ops
+ = {x2_u_ops, /* Types */
+ OP_TYPE_vvw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_vvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vvw_ops
+ = {x2_u_ops, /* Types */
+ OP_TYPE_v_vvw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+ sf_vc_vvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xvw_ops
+ = {x2_u_ops, /* Types */
+ OP_TYPE_xvw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_xvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xvw_ops
+ = {x2_u_ops, /* Types */
+ OP_TYPE_v_xvw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+ sf_vc_xvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_ivw_ops
+ = {x2_u_ops, /* Types */
+ OP_TYPE_ivw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_ivw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_ivw_ops
+ = {x2_u_ops, /* Types */
+ OP_TYPE_v_ivw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+ sf_vc_ivw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fvw_ops
+ = {x2_wu_ops, /* Types */
+ OP_TYPE_fvw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ sf_vc_fvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fvw_ops
+ = {x2_wu_ops, /* Types */
+ OP_TYPE_v_fvw, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+ sf_vc_fvw_args /* Args */};
+
/* A list of all RVV base function types. */
static CONSTEXPR const function_type_info function_types[] = {
#define DEF_RVV_TYPE_INDEX( \
@@ -3022,7 +3359,7 @@ static CONSTEXPR const function_type_info function_types[] = {
SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET, \
SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET, \
UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET, \
- UNSIGNED_EEW64_LMUL1_INTERPRET, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT, \
+ UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\
X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART) \
{ \
VECTOR_TYPE_##VECTOR, \
@@ -3087,6 +3424,7 @@ static CONSTEXPR const function_type_info function_types[] = {
VECTOR_TYPE_##UNSIGNED_EEW16_LMUL1_INTERPRET, \
VECTOR_TYPE_##UNSIGNED_EEW32_LMUL1_INTERPRET, \
VECTOR_TYPE_##UNSIGNED_EEW64_LMUL1_INTERPRET, \
+ VECTOR_TYPE_##X2, \
VECTOR_TYPE_##X2_VLMUL_EXT, \
VECTOR_TYPE_##X4_VLMUL_EXT, \
VECTOR_TYPE_##X8_VLMUL_EXT, \
@@ -3504,26 +3842,26 @@ check_required_extensions (const function_instance &instance)
required_extensions |= RVV_REQUIRE_RV64BIT;
}
- uint64_t riscv_isa_flags = 0;
+ uint64_t isa_flags = 0;
if (TARGET_VECTOR_ELEN_BF_16)
- riscv_isa_flags |= RVV_REQUIRE_ELEN_BF_16;
+ isa_flags |= RVV_REQUIRE_ELEN_BF_16;
if (TARGET_VECTOR_ELEN_FP_16)
- riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_16;
+ isa_flags |= RVV_REQUIRE_ELEN_FP_16;
if (TARGET_VECTOR_ELEN_FP_32)
- riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_32;
+ isa_flags |= RVV_REQUIRE_ELEN_FP_32;
if (TARGET_VECTOR_ELEN_FP_64)
- riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_64;
+ isa_flags |= RVV_REQUIRE_ELEN_FP_64;
if (TARGET_VECTOR_ELEN_64)
- riscv_isa_flags |= RVV_REQUIRE_ELEN_64;
+ isa_flags |= RVV_REQUIRE_ELEN_64;
if (TARGET_64BIT)
- riscv_isa_flags |= RVV_REQUIRE_RV64BIT;
+ isa_flags |= RVV_REQUIRE_RV64BIT;
if (TARGET_FULL_V)
- riscv_isa_flags |= RVV_REQUIRE_FULL_V;
+ isa_flags |= RVV_REQUIRE_FULL_V;
if (TARGET_MIN_VLEN > 32)
- riscv_isa_flags |= RVV_REQUIRE_MIN_VLEN_64;
+ isa_flags |= RVV_REQUIRE_MIN_VLEN_64;
- uint64_t missing_extensions = required_extensions & ~riscv_isa_flags;
+ uint64_t missing_extensions = required_extensions & ~isa_flags;
if (missing_extensions != 0)
return false;
return true;
@@ -3600,6 +3938,24 @@ rvv_arg_type_info::get_xfqf_float_type (vector_type_index type_idx) const
return NULL_TREE;
}
+tree
+rvv_arg_type_info::get_scalar_float_type (vector_type_index type_idx) const
+{
+ /* Convert vint types to their corresponding scalar float types.
+ Note:
+ - According to riscv-vector-builtins-types.def, the index of an unsigned
+ type is always one greater than its corresponding signed type.
+ - Conversion for vint8 types is not required. */
+ if (type_idx >= VECTOR_TYPE_vint16mf4_t && type_idx <= VECTOR_TYPE_vuint16m8_t)
+ return builtin_types[VECTOR_TYPE_vfloat16m1_t].scalar;
+ else if (type_idx >= VECTOR_TYPE_vint32mf2_t && type_idx <= VECTOR_TYPE_vuint32m8_t)
+ return builtin_types[VECTOR_TYPE_vfloat32m1_t].scalar;
+ else if (type_idx >= VECTOR_TYPE_vint64m1_t && type_idx <= VECTOR_TYPE_vuint64m8_t)
+ return builtin_types[VECTOR_TYPE_vfloat64m1_t].scalar;
+ else
+ return NULL_TREE;
+}
+
vector_type_index
rvv_arg_type_info::get_function_type_index (vector_type_index type_idx) const
{
@@ -3758,7 +4114,7 @@ function_instance::modifies_global_state_p () const
return true;
/* Handle direct modifications of global state. */
- return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR);
+ return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR | CP_USE_COPROCESSORS);
}
/* Return true if calls to the function could raise a signal. */
diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def
index 3a62869..be3fb1a 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -82,7 +82,7 @@ along with GCC; see the file COPYING3. If not see
SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET, \
SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET, \
UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET, \
- UNSIGNED_EEW64_LMUL1_INTERPRET, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT, \
+ UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\
X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART)
#endif
@@ -637,6 +637,32 @@ DEF_RVV_OP_TYPE (xu_w)
DEF_RVV_OP_TYPE (s)
DEF_RVV_OP_TYPE (4x8x4)
DEF_RVV_OP_TYPE (2x8x2)
+DEF_RVV_OP_TYPE (v_x)
+DEF_RVV_OP_TYPE (i)
+DEF_RVV_OP_TYPE (v_i)
+DEF_RVV_OP_TYPE (xv)
+DEF_RVV_OP_TYPE (iv)
+DEF_RVV_OP_TYPE (fv)
+DEF_RVV_OP_TYPE (vvv)
+DEF_RVV_OP_TYPE (xvv)
+DEF_RVV_OP_TYPE (ivv)
+DEF_RVV_OP_TYPE (fvv)
+DEF_RVV_OP_TYPE (vvw)
+DEF_RVV_OP_TYPE (xvw)
+DEF_RVV_OP_TYPE (ivw)
+DEF_RVV_OP_TYPE (fvw)
+DEF_RVV_OP_TYPE (v_vv)
+DEF_RVV_OP_TYPE (v_xv)
+DEF_RVV_OP_TYPE (v_iv)
+DEF_RVV_OP_TYPE (v_fv)
+DEF_RVV_OP_TYPE (v_vvv)
+DEF_RVV_OP_TYPE (v_xvv)
+DEF_RVV_OP_TYPE (v_ivv)
+DEF_RVV_OP_TYPE (v_fvv)
+DEF_RVV_OP_TYPE (v_vvw)
+DEF_RVV_OP_TYPE (v_xvw)
+DEF_RVV_OP_TYPE (v_ivw)
+DEF_RVV_OP_TYPE (v_fvw)
DEF_RVV_PRED_TYPE (ta)
DEF_RVV_PRED_TYPE (tu)
@@ -720,6 +746,7 @@ DEF_RVV_BASE_TYPE (unsigned_eew8_lmul1_interpret, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (unsigned_eew16_lmul1_interpret, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (unsigned_eew32_lmul1_interpret, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (unsigned_eew64_lmul1_interpret, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (x2_vector, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (vlmul_ext_x2, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (vlmul_ext_x4, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (vlmul_ext_x8, get_vector_type (type_idx))
@@ -729,6 +756,7 @@ DEF_RVV_BASE_TYPE (vlmul_ext_x64, get_vector_type (type_idx))
DEF_RVV_BASE_TYPE (size_ptr, build_pointer_type (size_type_node))
DEF_RVV_BASE_TYPE (tuple_subpart, get_tuple_subpart_type (type_idx))
DEF_RVV_BASE_TYPE (xfqf_float, get_xfqf_float_type (type_idx))
+DEF_RVV_BASE_TYPE (scalar_float, get_scalar_float_type (type_idx))
DEF_RVV_VXRM_ENUM (RNU, VXRM_RNU)
DEF_RVV_VXRM_ENUM (RNE, VXRM_RNE)
diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h
index 42ba905..1f2587a 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -130,6 +130,7 @@ enum required_ext
XSFVQMACCQOQ_EXT, /* XSFVQMACCQOQ extension */
XSFVQMACCDOD_EXT, /* XSFVQMACCDOD extension */
XSFVFNRCLIPXFQF_EXT, /* XSFVFNRCLIPXFQF extension */
+ XSFVCP_EXT, /* XSFVCP extension*/
/* Please update below to isa_name func when add or remove enum type(s). */
};
@@ -169,6 +170,8 @@ static inline const char * required_ext_to_isa_name (enum required_ext required)
return "xsfvqmaccdod";
case XSFVFNRCLIPXFQF_EXT:
return "xsfvfnrclipxfqf";
+ case XSFVCP_EXT:
+ return "xsfvcp";
default:
gcc_unreachable ();
}
@@ -212,6 +215,8 @@ static inline bool required_extensions_specified (enum required_ext required)
return TARGET_XSFVQMACCDOD;
case XSFVFNRCLIPXFQF_EXT:
return TARGET_XSFVFNRCLIPXFQF;
+ case XSFVCP_EXT:
+ return TARGET_XSFVCP;
default:
gcc_unreachable ();
}
@@ -297,6 +302,7 @@ struct rvv_arg_type_info
tree get_tree_type (vector_type_index) const;
tree get_tuple_subpart_type (vector_type_index) const;
tree get_xfqf_float_type (vector_type_index) const;
+ tree get_scalar_float_type (vector_type_index) const;
};
/* Static information for each operand. */
@@ -325,43 +331,7 @@ struct function_group_info
/* Return true if required extension is enabled */
bool match (required_ext ext_value) const
{
- switch (ext_value)
- {
- case VECTOR_EXT:
- return TARGET_VECTOR;
- case ZVBB_EXT:
- return TARGET_ZVBB;
- case ZVBB_OR_ZVKB_EXT:
- return (TARGET_ZVBB || TARGET_ZVKB);
- case ZVBC_EXT:
- return TARGET_ZVBC;
- case ZVKG_EXT:
- return TARGET_ZVKG;
- case ZVKNED_EXT:
- return TARGET_ZVKNED;
- case ZVKNHA_OR_ZVKNHB_EXT:
- return (TARGET_ZVKNHA || TARGET_ZVKNHB);
- case ZVKNHB_EXT:
- return TARGET_ZVKNHB;
- case ZVKSED_EXT:
- return TARGET_ZVKSED;
- case ZVKSH_EXT:
- return TARGET_ZVKSH;
- case XTHEADVECTOR_EXT:
- return TARGET_XTHEADVECTOR;
- case ZVFBFMIN_EXT:
- return TARGET_ZVFBFMIN;
- case ZVFBFWMA_EXT:
- return TARGET_ZVFBFWMA;
- case XSFVQMACCQOQ_EXT:
- return TARGET_XSFVQMACCQOQ;
- case XSFVQMACCDOD_EXT:
- return TARGET_XSFVQMACCDOD;
- case XSFVFNRCLIPXFQF_EXT:
- return TARGET_XSFVFNRCLIPXFQF;
- default:
- gcc_unreachable ();
- }
+ return required_extensions_specified (ext_value);
}
/* The base name, as a string. */
const char *base_name;
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 167375c..4d8170d 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -205,9 +205,7 @@ compute_local_program_points (
if (!is_gimple_assign_or_call (gsi_stmt (si)))
continue;
stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
- enum stmt_vec_info_type type
- = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
- if (type != undef_vec_info_type)
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
{
stmt_point info = {point, gsi_stmt (si), stmt_info};
program_points.safe_push (info);
@@ -626,7 +624,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode);
if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode))
return regno_alignment;
- else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
+ else
{
int estimated_vf = vect_vf_for_cost (loop_vinfo);
int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
@@ -636,25 +634,6 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
else
return estimated_lmul;
}
- else
- {
- /* Estimate the VLA SLP LMUL. */
- if (regno_alignment > RVV_M1)
- return regno_alignment;
- else if (mode != QImode
- || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ())
- {
- int ratio;
- if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
- GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio))
- {
- if (ratio == 1)
- return RVV_M4;
- else if (ratio == 2)
- return RVV_M2;
- }
- }
- }
return 0;
}
@@ -1120,8 +1099,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
switch (kind)
{
case scalar_to_vec:
- stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
- : costs->regmove->GR2VR);
+ stmt_cost
+ += (FLOAT_TYPE_P (vectype) ? get_fr2vr_cost () : get_gr2vr_cost ());
break;
case vec_to_scalar:
stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 0ac2538..4891b6c 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -685,7 +685,7 @@ invalid_opt_bb_p (basic_block cfg_bb)
/* We only do LCM optimizations on blocks that are post dominated by
EXIT block, that is, we don't do LCM optimizations on infinite loop. */
FOR_EACH_EDGE (e, ei, cfg_bb->succs)
- if (e->flags & EDGE_FAKE)
+ if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL))
return true;
return false;
@@ -2698,6 +2698,7 @@ pre_vsetvl::compute_lcm_local_properties ()
m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
+ bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
@@ -2749,6 +2750,10 @@ pre_vsetvl::compute_lcm_local_properties ()
if (invalid_opt_bb_p (bb->cfg_bb ()))
{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n --- skipping bb %u due to weird edge",
+ bb->index ());
+
bitmap_clear (m_antloc[bb_index]);
bitmap_clear (m_transp[bb_index]);
}
@@ -3022,6 +3027,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
continue;
}
+ /* We cannot lift a vsetvl into the source block if the block is
+ not transparent WRT to it.
+ This is too restrictive for blocks where a register's use only
+ feeds into vsetvls and no regular insns. One example is the
+ test rvv/vsetvl/avl_single-68.c which is currently XFAILed for
+ that reason.
+ In order to support this case we'd need to check the vsetvl's
+ AVL operand's uses in the source block and make sure they are
+ only used in other vsetvls. */
+ if (!bitmap_bit_p (m_transp[eg->src->index], expr_index))
+ continue;
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file,
@@ -3402,8 +3419,7 @@ pre_vsetvl::emit_vsetvl ()
}
start_sequence ();
insert_vsetvl_insn (EMIT_DIRECT, footer_info);
- rtx_insn *rinsn = get_insns ();
- end_sequence ();
+ rtx_insn *rinsn = end_sequence ();
default_rtl_profile ();
insert_insn_on_edge (rinsn, eg);
need_commit = true;
@@ -3434,8 +3450,7 @@ pre_vsetvl::emit_vsetvl ()
start_sequence ();
insert_vsetvl_insn (EMIT_DIRECT, info);
- rtx_insn *rinsn = get_insns ();
- end_sequence ();
+ rtx_insn *rinsn = end_sequence ();
default_rtl_profile ();
/* We should not get an abnormal edge here. */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 38f3ae7..3c1bb74 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -107,6 +107,8 @@ along with GCC; see the file COPYING3. If not see
/* True the mode switching has static frm, or false. */
#define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
+#define CFUN_IN_CALL(c) ((c)->machine->mode_sw_info.cfun_call)
+
/* True if we can use the instructions in the XTheadInt extension
to handle interrupts, or false. */
#define TH_INT_INTERRUPT(c) \
@@ -176,10 +178,13 @@ struct GTY(()) mode_switching_info {
mode instruction in the function or not. */
bool static_frm_p;
+ bool cfun_call;
+
mode_switching_info ()
{
dynamic_frm = NULL_RTX;
static_frm_p = false;
+ cfun_call = false;
}
};
@@ -297,6 +302,7 @@ struct riscv_tune_param
bool vector_unaligned_access;
bool use_divmod_expansion;
bool overlap_op_by_pieces;
+ bool speculative_sched_vsetvl;
unsigned int fusible_ops;
const struct cpu_vector_cost *vec_costs;
const char *function_align;
@@ -444,6 +450,29 @@ static const struct cpu_vector_cost generic_vector_cost = {
&rvv_regmove_vector_cost, /* regmove */
};
+/* Costs to use when optimizing for generic. */
+static const struct riscv_tune_param generic_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
+ {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
+ 1, /* issue_rate */
+ 4, /* branch_cost */
+ 5, /* memory_cost */
+ 8, /* fmv_cost */
+ true, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ false, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+};
+
/* Costs to use when optimizing for rocket. */
static const struct riscv_tune_param rocket_tune_info = {
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
@@ -459,6 +488,7 @@ static const struct riscv_tune_param rocket_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -481,6 +511,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -503,6 +534,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -525,6 +557,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -547,6 +580,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -569,6 +603,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -591,6 +626,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
true, /* vector_unaligned_access */
false, /* use_divmod_expansion */
true, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -613,6 +649,7 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = {
true, /* vector_unaligned_access */
true, /* use_divmod_expansion */
true, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -635,6 +672,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -642,6 +680,29 @@ static const struct riscv_tune_param optimize_size_tune_info = {
NULL, /* loop_align */
};
+/* Costs to use when optimizing for MIPS P8700 */
+static const struct riscv_tune_param mips_p8700_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
+ {COSTS_N_INSNS (5), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* fp_div */
+ {COSTS_N_INSNS (5), COSTS_N_INSNS (5)}, /* int_mul */
+ {COSTS_N_INSNS (8), COSTS_N_INSNS (8)}, /* int_div */
+ 4, /* issue_rate */
+ 8, /* branch_cost */
+ 4, /* memory_cost */
+ 8, /* fmv_cost */
+ true, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+};
+
static bool riscv_avoid_shrink_wrapping_separate ();
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
@@ -776,6 +837,16 @@ void riscv_frame_info::reset(void)
arg_pointer_offset = 0;
}
+/* Check if the mode is twice the size of the XLEN mode. */
+
+static bool
+riscv_2x_xlen_mode_p (machine_mode mode)
+{
+ poly_int64 mode_size = GET_MODE_SIZE (mode);
+ return mode_size.is_constant ()
+ && (mode_size.to_constant () == UNITS_PER_WORD * 2);
+}
+
/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
static unsigned int
@@ -2856,9 +2927,7 @@ riscv_call_tls_get_addr (rtx sym, rtx result)
gen_int_mode (RISCV_CC_BASE, SImode)));
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
- insn = get_insns ();
-
- end_sequence ();
+ insn = end_sequence ();
return insn;
}
@@ -3742,6 +3811,24 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
return true;
}
+ if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode)
+ && ((REG_P (dest) && MEM_P (src)) || (MEM_P (dest) && REG_P (src)))
+ && can_create_pseudo_p ())
+ {
+ rtx reg = REG_P (dest) ? dest : src;
+ unsigned regno = REGNO (reg);
+ /* ZILSD requires an even-odd register pair, let RA to
+ fix the constraint if the reg is hard reg and not even reg. */
+ if ((regno < FIRST_PSEUDO_REGISTER)
+ && (regno % 2) != 0)
+ {
+ rtx tmp = gen_reg_rtx (GET_MODE (reg));
+ emit_move_insn (tmp, src);
+ emit_move_insn (dest, tmp);
+ return true;
+ }
+ }
+
/* RISC-V GCC may generate non-legitimate address due to we provide some
pattern for optimize access PIC local symbol and it's make GCC generate
unrecognizable instruction during optimizing. */
@@ -3812,7 +3899,7 @@ static int
riscv_binary_cost (rtx x, int single_insns, int double_insns)
{
if (!riscv_v_ext_mode_p (GET_MODE (x))
- && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
+ && riscv_2x_xlen_mode_p (GET_MODE (x)))
return COSTS_N_INSNS (double_insns);
return COSTS_N_INSNS (single_insns);
}
@@ -3851,6 +3938,27 @@ riscv_extend_cost (rtx op, bool unsigned_p)
return COSTS_N_INSNS (2);
}
+/* Return the cost of the vector binary rtx like add, minus, mult.
+ The cost of scalar2vr_cost will be appended if there one of the
+ op comes from the VEC_DUPLICATE. */
+
+static int
+get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
+{
+ gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
+
+ rtx op_0 = XEXP (x, 0);
+ rtx op_1 = XEXP (x, 1);
+
+ if (GET_CODE (op_0) == VEC_DUPLICATE
+ || GET_CODE (op_1) == VEC_DUPLICATE)
+ return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+ else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE)
+ return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+ else
+ return COSTS_N_INSNS (1);
+}
+
/* Implement TARGET_RTX_COSTS. */
#define SINGLE_SHIFT_COST 1
@@ -3863,7 +3971,71 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
Cost Model need to be well analyzed and supported in the future. */
if (riscv_v_ext_mode_p (mode))
{
- *total = COSTS_N_INSNS (1);
+ int gr2vr_cost = get_gr2vr_cost ();
+ int fr2vr_cost = get_fr2vr_cost ();
+ int scalar2vr_cost = FLOAT_MODE_P (GET_MODE_INNER (mode))
+ ? fr2vr_cost : gr2vr_cost;
+
+ switch (outer_code)
+ {
+ case SET:
+ {
+ switch (GET_CODE (x))
+ {
+ case VEC_DUPLICATE:
+ *total = gr2vr_cost * COSTS_N_INSNS (1);
+ break;
+ case IF_THEN_ELSE:
+ {
+ rtx op = XEXP (x, 1);
+
+ switch (GET_CODE (op))
+ {
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
+ break;
+ default:
+ *total = COSTS_N_INSNS (1);
+ break;
+ }
+ }
+ break;
+ case PLUS:
+ case MINUS:
+ case AND:
+ case IOR:
+ case XOR:
+ case MULT:
+ case SMAX:
+ case UMAX:
+ case SMIN:
+ case UMIN:
+ {
+ rtx op;
+ rtx op_0 = XEXP (x, 0);
+ rtx op_1 = XEXP (x, 1);
+
+ if (GET_CODE (op = op_0) == MULT
+ || GET_CODE (op = op_1) == MULT)
+ *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
+ else
+ *total = get_vector_binary_rtx_cost (x, scalar2vr_cost);
+ }
+ break;
+ default:
+ *total = COSTS_N_INSNS (1);
+ break;
+ }
+ }
+ break;
+ default:
+ *total = COSTS_N_INSNS (1);
+ break;
+ }
+
return true;
}
@@ -3883,10 +4055,41 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
*total = COSTS_N_INSNS (1);
return true;
}
+
+ /* Register move for XLEN * 2. */
+ if (TARGET_ZILSD
+ && register_operand (SET_SRC (x), GET_MODE (SET_SRC (x)))
+ && riscv_2x_xlen_mode_p (mode))
+ {
+ /* We still need two instruction for move with ZILSD,
+ but let minus one cost to let subreg split don't.
+ TODO: Add riscv_tune_param for this. */
+ *total = COSTS_N_INSNS (2) - 1;
+ return true;
+ }
+
+ /* Load for XLEN * 2. */
+ if (TARGET_ZILSD && MEM_P (SET_SRC (x))
+ && riscv_2x_xlen_mode_p (mode))
+ {
+ /* TODO: Add riscv_tune_param for this. */
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
+
riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed);
return true;
}
+ /* Store for XLEN * 2. */
+ if (TARGET_ZILSD && MEM_P (SET_DEST (x)) && REG_P (SET_SRC (x))
+ && riscv_2x_xlen_mode_p (mode))
+ {
+ /* TODO: Add riscv_tune_param for this. */
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
+
/* Otherwise return FALSE indicating we should recurse into both the
SET_DEST and SET_SRC combining the cost of both. */
return false;
@@ -4486,16 +4689,14 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq,
rtx dest = SET_DEST (x);
- /* Do something similar for the moves that are likely to
+ /* Do something similar for the moves that are likely to
turn into NOP moves by the time the register allocator is
- done. These are also side effects of how our sCC expanders
- work. We'll want to check and update LAST_DEST here too. */
- if (last_dest
- && REG_P (dest)
+ done. We don't require src to be something set in this
+ sequence, just a promoted SUBREG. */
+ if (REG_P (dest)
&& GET_MODE (dest) == SImode
&& SUBREG_P (src)
- && SUBREG_PROMOTED_VAR_P (src)
- && REGNO (SUBREG_REG (src)) == REGNO (last_dest))
+ && SUBREG_PROMOTED_VAR_P (src))
{
riscv_if_info.original_cost += COSTS_N_INSNS (1);
riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
@@ -4544,6 +4745,19 @@ riscv_split_64bit_move_p (rtx dest, rtx src)
if (TARGET_64BIT)
return false;
+ /* Zilsd provides load/store with even-odd register pair. */
+ if (TARGET_ZILSD
+ && (((REG_P (dest) && MEM_P (src))
+ || (MEM_P (dest) && REG_P (src)))))
+ {
+ rtx reg = REG_P (dest) ? dest : src;
+ unsigned regno = REGNO (reg);
+ /* GCC may still generating some load/store with odd-even reg pair
+ because the ABI handling, but that's fine, just split that later. */
+ if (GP_REG_P (regno))
+ return (regno < FIRST_PSEUDO_REGISTER) && ((regno % 2) != 0);
+ }
+
/* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
if (satisfies_constraint_zfli (src))
return false;
@@ -5259,34 +5473,81 @@ riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
bool
riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
{
- machine_mode mode = GET_MODE (dest);
+ machine_mode dst_mode = GET_MODE (dest);
+ machine_mode cond_mode = GET_MODE (dest);
rtx_code code = GET_CODE (op);
rtx op0 = XEXP (op, 0);
rtx op1 = XEXP (op, 1);
+ /* General note. This is called from the conditional move
+ expander. That simplifies the cases we need to worry about
+ as we know the destination will have the same mode as the
+ true/false arms. Furthermore we know that mode will be
+ DI/SI for rv64 or SI for rv32. */
+
+ /* For some tests, we can easily construct a 0, -1 value
+ which can then be used to synthesize more efficient
+ sequences that don't use zicond. */
+ if ((code == LT || code == GE)
+ && (REG_P (op0) || SUBREG_P (op0))
+ && op1 == CONST0_RTX (GET_MODE (op0)))
+ {
+ /* The code to expand signed division by a power of 2 uses a
+ conditional add by 2^n-1 idiom. It can be more efficiently
+ synthesized without zicond using srai+srli+add.
+
+ But we don't see the constants here. Just a conditional move
+ with registers as the true/false values. So this is a little
+ over-aggressive and can result in a few missed if-conversions. */
+ if ((REG_P (cons) || SUBREG_P (cons))
+ && (REG_P (alt) || SUBREG_P (alt)))
+ return false;
+
+ /* If one value is a nonzero constant and the other value is
+ not a constant, then avoid zicond as more efficient sequences
+ using the splatted sign bit are often possible. */
+ if (CONST_INT_P (alt)
+ && alt != CONST0_RTX (dst_mode)
+ && !CONST_INT_P (cons))
+ return false;
+
+ if (CONST_INT_P (cons)
+ && cons != CONST0_RTX (dst_mode)
+ && !CONST_INT_P (alt))
+ return false;
+
+ /* If we need more special cases, add them here. */
+ }
+
if (((TARGET_ZICOND_LIKE
- || (arith_operand (cons, mode) && arith_operand (alt, mode)))
- && (GET_MODE_CLASS (mode) == MODE_INT))
+ || (arith_operand (cons, dst_mode) && arith_operand (alt, dst_mode)))
+ && GET_MODE_CLASS (dst_mode) == MODE_INT
+ && GET_MODE_CLASS (cond_mode) == MODE_INT)
|| TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
{
machine_mode mode0 = GET_MODE (op0);
machine_mode mode1 = GET_MODE (op1);
- /* An integer comparison must be comparing WORD_MODE objects. We
- must enforce that so that we don't strip away a sign_extension
- thinking it is unnecessary. We might consider using
- riscv_extend_operands if they are not already properly extended. */
+ /* An integer comparison must be comparing WORD_MODE objects.
+ Extend the comparison arguments as necessary. */
if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
|| (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
- return false;
+ riscv_extend_comparands (code, &op0, &op1);
- /* In the fallback generic case use MODE rather than WORD_MODE for
- the output of the SCC instruction, to match the mode of the NEG
+ /* We might have been handed back a SUBREG. Just to make things
+ easy, force it into a REG. */
+ if (!REG_P (op0) && !CONST_INT_P (op0))
+ op0 = force_reg (word_mode, op0);
+ if (!REG_P (op1) && !CONST_INT_P (op1))
+ op1 = force_reg (word_mode, op1);
+
+ /* In the fallback generic case use DST_MODE rather than WORD_MODE
+ for the output of the SCC instruction, to match the mode of the NEG
operation below. The output of SCC is 0 or 1 boolean, so it is
valid for input in any scalar integer mode. */
rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
|| TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
- ? word_mode : mode);
+ ? word_mode : dst_mode);
bool invert = false;
/* Canonicalize the comparison. It must be an equality comparison
@@ -5315,7 +5576,7 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
else
return false;
- op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
+ op = gen_rtx_fmt_ee (invert ? EQ : NE, cond_mode, tmp, const0_rtx);
/* We've generated a new comparison. Update the local variables. */
code = GET_CODE (op);
@@ -5334,10 +5595,10 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
arm of the conditional move. That allows us to support more
cases for extensions which are more general than SFB. But
does mean we need to force CONS into a register at this point. */
- cons = force_reg (mode, cons);
+ cons = force_reg (dst_mode, cons);
/* With XTheadCondMov we need to force ALT into a register too. */
- alt = force_reg (mode, alt);
- emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
+ alt = force_reg (dst_mode, alt);
+ emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond,
cons, alt)));
return true;
}
@@ -5346,10 +5607,10 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
if (invert)
std::swap (cons, alt);
- rtx reg1 = gen_reg_rtx (mode);
- rtx reg2 = gen_reg_rtx (mode);
- rtx reg3 = gen_reg_rtx (mode);
- rtx reg4 = gen_reg_rtx (mode);
+ rtx reg1 = gen_reg_rtx (dst_mode);
+ rtx reg2 = gen_reg_rtx (dst_mode);
+ rtx reg3 = gen_reg_rtx (dst_mode);
+ rtx reg4 = gen_reg_rtx (dst_mode);
riscv_emit_unary (NEG, reg1, tmp);
riscv_emit_binary (AND, reg2, reg1, cons);
@@ -5359,48 +5620,52 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
return true;
}
/* 0, reg or 0, imm */
- else if (cons == CONST0_RTX (mode)
- && (REG_P (alt)
- || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
+ else if (cons == CONST0_RTX (dst_mode)
+ && ((REG_P (alt) || SUBREG_P (alt))
+ || (CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode))))
{
riscv_emit_int_compare (&code, &op0, &op1, true);
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- alt = force_reg (mode, alt);
+ alt = force_reg (dst_mode, alt);
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
cons, alt)));
return true;
}
/* imm, imm */
- else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
- && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
+ else if (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode)
+ && CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode))
{
riscv_emit_int_compare (&code, &op0, &op1, true);
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
- alt = force_reg (mode, gen_int_mode (t, mode));
+ alt = force_reg (dst_mode, gen_int_mode (t, dst_mode));
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
- CONST0_RTX (mode),
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+ CONST0_RTX (dst_mode),
alt)));
/* CONS might not fit into a signed 12 bit immediate suitable
for an addi instruction. If that's the case, force it
into a register. */
if (!SMALL_OPERAND (INTVAL (cons)))
- cons = force_reg (mode, cons);
+ cons = force_reg (dst_mode, cons);
riscv_emit_binary (PLUS, dest, dest, cons);
return true;
}
/* imm, reg */
- else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
+ else if (CONST_INT_P (cons)
+ && cons != CONST0_RTX (dst_mode)
+ && (REG_P (alt) || SUBREG_P (alt)))
{
/* Optimize for register value of 0. */
- if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
+ if (code == NE
+ && rtx_equal_p (op0, alt)
+ && op1 == CONST0_RTX (dst_mode))
{
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- cons = force_reg (mode, cons);
+ cons = force_reg (dst_mode, cons);
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
cons, alt)));
return true;
}
@@ -5408,47 +5673,51 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
riscv_emit_int_compare (&code, &op0, &op1, true);
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- rtx temp1 = gen_reg_rtx (mode);
- rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
+ rtx temp1 = gen_reg_rtx (dst_mode);
+ rtx temp2 = gen_int_mode (-1 * INTVAL (cons), dst_mode);
/* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
suitable for an addi instruction. If that's the case, force it
into a register. */
if (!SMALL_OPERAND (INTVAL (temp2)))
- temp2 = force_reg (mode, temp2);
+ temp2 = force_reg (dst_mode, temp2);
if (!SMALL_OPERAND (INTVAL (cons)))
- cons = force_reg (mode, cons);
+ cons = force_reg (dst_mode, cons);
riscv_emit_binary (PLUS, temp1, alt, temp2);
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
- CONST0_RTX (mode),
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+ CONST0_RTX (dst_mode),
temp1)));
riscv_emit_binary (PLUS, dest, dest, cons);
return true;
}
/* reg, 0 or imm, 0 */
- else if ((REG_P (cons)
- || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
- && alt == CONST0_RTX (mode))
+ else if (((REG_P (cons) || SUBREG_P (cons))
+ || (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode)))
+ && alt == CONST0_RTX (dst_mode))
{
riscv_emit_int_compare (&code, &op0, &op1, true);
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- cons = force_reg (mode, cons);
- emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
+ cons = force_reg (dst_mode, cons);
+ emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond,
cons, alt)));
return true;
}
/* reg, imm */
- else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
+ else if ((REG_P (cons) || (SUBREG_P (cons)))
+ && CONST_INT_P (alt)
+ && alt != CONST0_RTX (dst_mode))
{
/* Optimize for register value of 0. */
- if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
+ if (code == EQ
+ && rtx_equal_p (op0, cons)
+ && op1 == CONST0_RTX (dst_mode))
{
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- alt = force_reg (mode, alt);
+ alt = force_reg (dst_mode, alt);
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
cons, alt)));
return true;
}
@@ -5456,53 +5725,54 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
riscv_emit_int_compare (&code, &op0, &op1, true);
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- rtx temp1 = gen_reg_rtx (mode);
- rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
+ rtx temp1 = gen_reg_rtx (dst_mode);
+ rtx temp2 = gen_int_mode (-1 * INTVAL (alt), dst_mode);
/* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
suitable for an addi instruction. If that's the case, force it
into a register. */
if (!SMALL_OPERAND (INTVAL (temp2)))
- temp2 = force_reg (mode, temp2);
+ temp2 = force_reg (dst_mode, temp2);
if (!SMALL_OPERAND (INTVAL (alt)))
- alt = force_reg (mode, alt);
+ alt = force_reg (dst_mode, alt);
riscv_emit_binary (PLUS, temp1, cons, temp2);
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
temp1,
- CONST0_RTX (mode))));
+ CONST0_RTX (dst_mode))));
riscv_emit_binary (PLUS, dest, dest, alt);
return true;
}
/* reg, reg */
- else if (REG_P (cons) && REG_P (alt))
+ else if ((REG_P (cons) || SUBREG_P (cons))
+ && (REG_P (alt) || SUBREG_P (alt)))
{
if (((code == EQ && rtx_equal_p (cons, op0))
|| (code == NE && rtx_equal_p (alt, op0)))
- && op1 == CONST0_RTX (mode))
+ && op1 == CONST0_RTX (dst_mode))
{
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- alt = force_reg (mode, alt);
+ alt = force_reg (dst_mode, alt);
emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (mode, cond,
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond,
cons, alt)));
return true;
}
- rtx reg1 = gen_reg_rtx (mode);
- rtx reg2 = gen_reg_rtx (mode);
+ rtx reg1 = gen_reg_rtx (dst_mode);
+ rtx reg2 = gen_reg_rtx (dst_mode);
riscv_emit_int_compare (&code, &op0, &op1, true);
rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
GET_MODE (op0), op0, op1);
emit_insn (gen_rtx_SET (reg2,
- gen_rtx_IF_THEN_ELSE (mode, cond2,
- CONST0_RTX (mode),
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond2,
+ CONST0_RTX (dst_mode),
cons)));
emit_insn (gen_rtx_SET (reg1,
- gen_rtx_IF_THEN_ELSE (mode, cond1,
- CONST0_RTX (mode),
+ gen_rtx_IF_THEN_ELSE (dst_mode, cond1,
+ CONST0_RTX (dst_mode),
alt)));
riscv_emit_binary (PLUS, dest, reg1, reg2);
return true;
@@ -6879,6 +7149,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
'T' Print shift-index of inverted single-bit mask OP.
'~' Print w if TARGET_64BIT is true; otherwise not print anything.
'N' Print register encoding as integer (0-31).
+ 'H' Print the name of the next register for integer.
Note please keep this list and the list in riscv.md in sync. */
@@ -7174,6 +7445,27 @@ riscv_print_operand (FILE *file, rtx op, int letter)
asm_fprintf (file, "%u", (regno - offset));
break;
}
+ case 'H':
+ {
+ if (!REG_P (op))
+ {
+ output_operand_lossage ("modifier 'H' require register operand");
+ break;
+ }
+ if (REGNO (op) > 31)
+ {
+ output_operand_lossage ("modifier 'H' is for integer registers only");
+ break;
+ }
+ if (REGNO (op) == 31)
+ {
+ output_operand_lossage ("modifier 'H' cannot be applied to R31");
+ break;
+ }
+
+ fputs (reg_names[REGNO (op) + 1], file);
+ break;
+ }
default:
switch (code)
{
@@ -7863,11 +8155,9 @@ riscv_can_inline_p (tree caller, tree callee)
struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
- int isa_flag_mask = riscv_x_target_flags_isa_mask ();
-
- /* Callee and caller should have the same target options except for ISA. */
- int callee_target_flags = callee_opts->x_target_flags & ~isa_flag_mask;
- int caller_target_flags = caller_opts->x_target_flags & ~isa_flag_mask;
+ /* Callee and caller should have the same target options. */
+ int callee_target_flags = callee_opts->x_target_flags;
+ int caller_target_flags = caller_opts->x_target_flags;
if (callee_target_flags != caller_target_flags)
return false;
@@ -9650,27 +9940,27 @@ int
riscv_register_move_cost (machine_mode mode,
reg_class_t from, reg_class_t to)
{
- bool from_is_fpr = from == FP_REGS || from == RVC_FP_REGS;
- bool from_is_gpr = from == GR_REGS || from == RVC_GR_REGS;
- bool to_is_fpr = to == FP_REGS || to == RVC_FP_REGS;
- bool to_is_gpr = to == GR_REGS || to == RVC_GR_REGS;
+ bool from_is_fpr = reg_class_subset_p (from, FP_REGS);
+ bool from_is_gpr = reg_class_subset_p (from, GR_REGS);
+ bool to_is_fpr = reg_class_subset_p (to, FP_REGS);
+ bool to_is_gpr = reg_class_subset_p (to, GR_REGS);
if ((from_is_fpr && to_is_gpr) || (from_is_gpr && to_is_fpr))
return tune_param->fmv_cost;
if (from == V_REGS)
{
- if (to == GR_REGS)
+ if (to_is_gpr)
return get_vector_costs ()->regmove->VR2GR;
- else if (to == FP_REGS)
+ else if (to_is_fpr)
return get_vector_costs ()->regmove->VR2FR;
}
if (to == V_REGS)
{
- if (from == GR_REGS)
- return get_vector_costs ()->regmove->GR2VR;
- else if (from == FP_REGS)
- return get_vector_costs ()->regmove->FR2VR;
+ if (from_is_gpr)
+ return get_gr2vr_cost ();
+ else if (from_is_fpr)
+ return get_fr2vr_cost ();
}
return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
@@ -9746,6 +10036,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
if (riscv_v_ext_mode_p (mode))
return false;
+ /* Zilsd require load/store with even-odd reg pair. */
+ if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode) && ((regno % 2) != 0))
+ return false;
+
if (!GP_REG_P (regno + nregs - 1))
return false;
}
@@ -10222,6 +10516,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
return new_cost;
}
+/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can
+ can be scheduled for speculative execution. Reject vsetvl instructions to
+ prevent the scheduler from hoisting them out of basic blocks without
+ checking for data dependencies PR117974. */
+static bool
+riscv_sched_can_speculate_insn (rtx_insn *insn)
+{
+ /* Gate speculative scheduling of vsetvl instructions behind tune param. */
+ if (tune_param->speculative_sched_vsetvl)
+ return true;
+
+ switch (get_attr_type (insn))
+ {
+ case TYPE_VSETVL:
+ case TYPE_VSETVL_PRE:
+ return false;
+ default:
+ return true;
+ }
+}
+
/* Auxiliary function to emit RISC-V ELF attribute. */
static void
riscv_emit_attribute ()
@@ -10382,7 +10697,7 @@ riscv_file_end ()
fprintf (asm_out_file, "1:\n");
/* pr_type. */
- fprintf (asm_out_file, "\t.p2align\t3\n");
+ fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
fprintf (asm_out_file, "2:\n");
fprintf (asm_out_file, "\t.long\t0xc0000000\n");
/* pr_datasz. */
@@ -11528,11 +11843,10 @@ riscv_gpr_save_operation_p (rtx op)
/* Two CLOBBER and USEs, must check the order. */
unsigned expect_code = i < 3 ? CLOBBER : USE;
if (GET_CODE (elt) != expect_code
- || !REG_P (XEXP (elt, 1))
- || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
+ || !REG_P (XEXP (elt, 0))
+ || (REGNO (XEXP (elt, 0)) != gpr_save_reg_order[i]))
return false;
}
- break;
}
return true;
}
@@ -12047,27 +12361,30 @@ riscv_emit_frm_mode_set (int mode, int prev_mode)
if (prev_mode == riscv_vector::FRM_DYN_CALL)
emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
- if (mode != prev_mode)
- {
- rtx frm = gen_int_mode (mode, SImode);
+ if (mode == prev_mode)
+ return;
- if (mode == riscv_vector::FRM_DYN_CALL
- && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
- /* No need to emit when prev mode is DYN already. */
- emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
- else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
- && prev_mode != riscv_vector::FRM_DYN
- && prev_mode != riscv_vector::FRM_DYN_CALL)
- /* No need to emit when prev mode is DYN or DYN_CALL already. */
- emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
- else if (mode == riscv_vector::FRM_DYN
- && prev_mode != riscv_vector::FRM_DYN_CALL)
- /* Restore frm value from backup when switch to DYN mode. */
- emit_insn (gen_fsrmsi_restore (backup_reg));
- else if (riscv_static_frm_mode_p (mode))
- /* Set frm value when switch to static mode. */
- emit_insn (gen_fsrmsi_restore (frm));
+ if (riscv_static_frm_mode_p (mode))
+ {
+ /* Set frm value when switch to static mode. */
+ emit_insn (gen_fsrmsi_restore (gen_int_mode (mode, SImode)));
+ return;
}
+
+ bool restore_p
+ = /* No need to emit when prev mode is DYN. */
+ (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_CALL
+ && prev_mode != riscv_vector::FRM_DYN)
+ /* No need to emit if prev mode is DYN or DYN_CALL. */
+ || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_EXIT
+ && prev_mode != riscv_vector::FRM_DYN
+ && prev_mode != riscv_vector::FRM_DYN_CALL)
+ /* Restore frm value when switch to DYN mode. */
+ || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN
+ && prev_mode != riscv_vector::FRM_DYN_CALL);
+
+ if (restore_p)
+ emit_insn (gen_fsrmsi_restore (backup_reg));
}
/* Implement Mode switching. */
@@ -12090,59 +12407,6 @@ riscv_emit_mode_set (int entity, int mode, int prev_mode,
}
}
-/* Adjust the FRM_NONE insn after a call to FRM_DYN for the
- underlying emit. */
-
-static int
-riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
-{
- rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
-
- if (insn && CALL_P (insn))
- return riscv_vector::FRM_DYN;
-
- return mode;
-}
-
-/* Insert the backup frm insn to the end of the bb if and only if the call
- is the last insn of this bb. */
-
-static void
-riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
-{
- edge eg;
- bool abnormal_edge_p = false;
- edge_iterator eg_iterator;
- basic_block bb = BLOCK_FOR_INSN (cur_insn);
-
- FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
- {
- if (eg->flags & EDGE_ABNORMAL)
- abnormal_edge_p = true;
- else
- {
- start_sequence ();
- emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
- rtx_insn *backup_insn = get_insns ();
- end_sequence ();
-
- insert_insn_on_edge (backup_insn, eg);
- }
- }
-
- if (abnormal_edge_p)
- {
- start_sequence ();
- emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
- rtx_insn *backup_insn = get_insns ();
- end_sequence ();
-
- insert_insn_end_basic_block (backup_insn, bb);
- }
-
- commit_edge_insertions ();
-}
-
/* Return mode that frm must be switched into
prior to the execution of insn. */
@@ -12154,33 +12418,25 @@ riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
/* The dynamic frm will be initialized only onece during cfun. */
DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
+ CFUN_IN_CALL (cfun) = false;
}
if (CALL_P (cur_insn))
{
- rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
-
- if (!insn)
- riscv_frm_emit_after_bb_end (cur_insn);
-
+ CFUN_IN_CALL (cfun) = true;
return riscv_vector::FRM_DYN_CALL;
}
int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
if (mode == riscv_vector::FRM_NONE)
- /* After meet a call, we need to backup the frm because it may be
- updated during the call. Here, for each insn, we will check if
- the previous insn is a call or not. When previous insn is call,
- there will be 2 cases for the emit mode set.
-
- 1. Current insn is not MODE_NONE, then the mode switch framework
- will do the mode switch from MODE_CALL to MODE_NONE natively.
- 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
- the MODE_DYN, and leave the mode switch itself to perform
- the emit mode set.
- */
- mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
+ {
+ if (CFUN_IN_CALL (cfun))
+ {
+ CFUN_IN_CALL (cfun) = false;
+ return riscv_vector::FRM_DYN;
+ }
+ }
return mode;
}
@@ -12207,7 +12463,7 @@ singleton_vxrm_need (void)
/* Walk the IL noting if VXRM is needed and if there's more than one
mode needed. */
bool found = false;
- int saved_vxrm_mode;
+ int saved_vxrm_mode = VXRM_MODE_NONE;
for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (!INSN_P (insn) || DEBUG_INSN_P (insn))
@@ -12480,6 +12736,36 @@ get_vector_costs ()
return costs;
}
+/* Return the cost of operation that move from gpr to vr.
+ It will take the value of --param=gpr2vr_cost if it is provided.
+ Or the default regmove->GR2VR will be returned. */
+
+int
+get_gr2vr_cost ()
+{
+ int cost = get_vector_costs ()->regmove->GR2VR;
+
+ if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED)
+ cost = gpr2vr_cost;
+
+ return cost;
+}
+
+/* Return the cost of moving data from floating-point to vector register.
+ It will take the value of --param=fpr2vr-cost if it is provided.
+ Otherwise the default regmove->FR2VR will be returned. */
+
+int
+get_fr2vr_cost ()
+{
+ int cost = get_vector_costs ()->regmove->FR2VR;
+
+ if (fpr2vr_cost != FPR2VR_COST_UNPROVIDED)
+ cost = fpr2vr_cost;
+
+ return cost;
+}
+
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
@@ -12545,8 +12831,7 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case vec_construct:
{
/* TODO: This is too pessimistic in case we can splat. */
- int regmove_cost = fp ? costs->regmove->FR2VR
- : costs->regmove->GR2VR;
+ int regmove_cost = fp ? get_fr2vr_cost () : get_gr2vr_cost ();
return (regmove_cost + common_costs->scalar_to_vec_cost)
* estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
}
@@ -13136,9 +13421,6 @@ parse_features_for_version (tree decl,
DECL_SOURCE_LOCATION (decl));
gcc_assert (parse_res);
- if (arch_string != default_opts->x_riscv_arch_string)
- free (CONST_CAST (void *, (const void *) arch_string));
-
cl_target_option_restore (&global_options, &global_options_set,
&cur_target);
}
@@ -13735,7 +14017,6 @@ riscv_get_function_versions_dispatcher (void *decl)
struct cgraph_node *node = NULL;
struct cgraph_node *default_node = NULL;
struct cgraph_function_version_info *node_v = NULL;
- struct cgraph_function_version_info *first_v = NULL;
tree dispatch_decl = NULL;
@@ -13752,41 +14033,16 @@ riscv_get_function_versions_dispatcher (void *decl)
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
- /* Find the default version and make it the first node. */
- first_v = node_v;
- /* Go to the beginning of the chain. */
- while (first_v->prev != NULL)
- first_v = first_v->prev;
- default_version_info = first_v;
-
- while (default_version_info != NULL)
- {
- struct riscv_feature_bits res;
- int priority; /* Unused. */
- parse_features_for_version (default_version_info->this_node->decl,
- res, priority);
- if (res.length == 0)
- break;
- default_version_info = default_version_info->next;
- }
+ /* The default node is always the beginning of the chain. */
+ default_version_info = node_v;
+ while (default_version_info->prev)
+ default_version_info = default_version_info->prev;
+ default_node = default_version_info->this_node;
/* If there is no default node, just return NULL. */
- if (default_version_info == NULL)
+ if (!is_function_default_version (default_node->decl))
return NULL;
- /* Make default info the first node. */
- if (first_v != default_version_info)
- {
- default_version_info->prev->next = default_version_info->next;
- if (default_version_info->next)
- default_version_info->next->prev = default_version_info->prev;
- first_v->prev = default_version_info;
- default_version_info->next = first_v;
- default_version_info->prev = NULL;
- }
-
- default_node = default_version_info->this_node;
-
if (targetm.has_ifunc_p ())
{
struct cgraph_function_version_info *it_v = NULL;
@@ -13930,17 +14186,53 @@ expand_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]);
riscv_expand_op (XOR, word_mode, a0, crc, data);
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmul_di (a0, a0, t0));
- else
- emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+ if (TARGET_ZBKC || TARGET_ZBC)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+ else
+ emit_insn (gen_riscv_clmul_si (a0, a0, t0));
- riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
- gen_int_mode (crc_size, word_mode));
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+ riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
+ gen_int_mode (crc_size, word_mode));
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+ else
+ emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+ }
else
- emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+ {
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+ gcc_unreachable ();
+
+ rtx vec = gen_reg_rtx (vmode);
+
+ insn_code icode1 = code_for_pred_broadcast (vmode);
+ rtx ops1[] = {vec, a0};
+ emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+ rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+ insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+ E_RVVM1DImode);
+ rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+ emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+ (Pmode));
+
+ rtx shift_amount = gen_int_mode (data_size, Pmode);
+ insn_code icode3 = code_for_pred_scalar (LSHIFTRT, vmode);
+ rtx ops3[] = {vec, vec, shift_amount};
+ emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+ insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+ E_RVVM1DImode);
+ rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+ emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+ (Pmode));
+
+ rtx vec_low_lane = gen_lowpart (DImode, vec);
+ riscv_emit_move (a0, vec_low_lane);
+ }
if (crc_size > data_size)
{
@@ -13989,19 +14281,53 @@ expand_reversed_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
rtx a0 = gen_reg_rtx (word_mode);
riscv_expand_op (XOR, word_mode, a0, crc, data);
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmul_di (a0, a0, t0));
- else
- emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+ if (TARGET_ZBKC || TARGET_ZBC)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+ else
+ emit_insn (gen_riscv_clmul_si (a0, a0, t0));
- rtx num_shift = gen_int_mode (GET_MODE_BITSIZE (word_mode) - data_size,
- word_mode);
- riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
+ rtx num_shift = gen_int_mode (BITS_PER_WORD - data_size, word_mode);
+ riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+ else
+ emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+ }
else
- emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+ {
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+ gcc_unreachable ();
+
+ rtx vec = gen_reg_rtx (vmode);
+ insn_code icode1 = code_for_pred_broadcast (vmode);
+ rtx ops1[] = {vec, a0};
+ emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+ rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+ insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+ E_RVVM1DImode);
+ rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+ emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+ (Pmode));
+
+ rtx shift_amount = gen_int_mode (BITS_PER_WORD - data_size, Pmode);
+ insn_code icode3 = code_for_pred_scalar (ASHIFT, vmode);
+ rtx ops3[] = {vec, vec, shift_amount};
+ emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+ insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+ E_RVVM1DImode);
+ rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+ emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+ (Pmode));
+
+ rtx vec_low_lane = gen_lowpart (DImode, vec);
+ riscv_emit_move (a0, vec_low_lane);
+ }
if (crc_size > data_size)
{
@@ -14035,6 +14361,427 @@ bool need_shadow_stack_push_pop_p ()
return is_zicfiss_p () && riscv_save_return_addr_reg_p ();
}
+/* Synthesize OPERANDS[0] = OPERANDS[1] CODE OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ CODE is IOR or XOR.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_ior_xor (rtx_code code, rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2]))
+ || ((TARGET_ZBS || TARGET_ZBKB)
+ && single_bit_mask_operand (operands[2], word_mode)))
+ return false;
+
+ /* The number of instructions to synthesize the constant is a good
+ estimate of the budget. That does not account for out of order
+ execution an fusion in the constant synthesis those would naturally
+ decrease the budget. It also does not account for the IOR/XOR at
+ the end of the sequence which would increase the budget. */
+ int budget = (TARGET_ZBS ? riscv_const_insns (operands[2], true) : -1);
+ int original_budget = budget;
+
+ /* Bits we need to set in operands[0]. As we synthesize the operation,
+ we clear bits in IVAL. Once IVAL is zero, then synthesis of the
+ operation is complete. */
+ unsigned HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+ /* Check if we want to use [x]ori. Then get the remaining bits
+ and decrease the budget by one. */
+ if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+ {
+ ival &= ~HOST_WIDE_INT_UC (0x7ff);
+ budget--;
+ }
+
+ /* Check for bseti cases. For each remaining bit in ival,
+ decrease the budget by one. */
+ while (ival)
+ {
+ HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ ival &= ~tmpval;
+ budget--;
+ }
+
+ /* If we're flipping all but a small number of bits we can pre-flip
+ the outliers, then flip all the bits, which would restore those
+ bits that were pre-flipped. */
+ if ((TARGET_ZBS || TARGET_ZBKB)
+ && budget < 0
+ && code == XOR
+ && popcount_hwi (~INTVAL (operands[2])) < original_budget)
+ {
+ /* Pre-flipping bits we want to preserve. */
+ rtx input = operands[1];
+ rtx output = NULL_RTX;
+ ival = ~INTVAL (operands[2]);
+ while (ival)
+ {
+ HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ rtx x = GEN_INT (tmpval);
+ x = gen_rtx_XOR (word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ ival &= ~tmpval;
+ }
+
+ gcc_assert (output);
+
+ /* Now flip all the bits, which restores the bits we were
+ preserving. */
+ rtx x = gen_rtx_NOT (word_mode, input);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* One more approach we can try. If our budget is 3+ instructions,
+ then we can try to rotate the source so that the bits we want to
+ set are in the low 11 bits. We then use [x]ori to set those low
+ bits, then rotate things back into their proper place. */
+ if ((TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB)
+ && budget < 0
+ && popcount_hwi (INTVAL (operands[2])) <= 11
+ && riscv_const_insns (operands[2], true) >= 3)
+ {
+ ival = INTVAL (operands[2]);
+ /* First see if the constant trivially fits into 11 bits in the LSB. */
+ int lsb = ctz_hwi (ival);
+ int msb = BITS_PER_WORD - 1 - clz_hwi (ival);
+ if (msb - lsb + 1 <= 11)
+ {
+ rtx output = gen_reg_rtx (word_mode);
+ rtx input = operands[1];
+
+ /* Rotate the source right by LSB bits. */
+ rtx x = GEN_INT (lsb);
+ x = gen_rtx_ROTATERT (word_mode, input, x);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* Shift the constant right by LSB bits. */
+ x = GEN_INT (ival >> lsb);
+
+ /* Perform the IOR/XOR operation. */
+ x = gen_rtx_fmt_ee (code, word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* And rotate left to put everything back in place, we don't
+ have rotate left by a constant, so use rotate right by
+ an adjusted constant. */
+ x = GEN_INT (BITS_PER_WORD - lsb);
+ x = gen_rtx_ROTATERT (word_mode, input, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* Maybe the bits are split between the high and low parts
+ of the constant. A bit more complex, but still manageable.
+
+ Conceptually we want to rotate left the constant by the number
+ of leading zeros after masking off all but the low 11 bits. */
+ int rotcount = clz_hwi (ival & 0x7ff) - (BITS_PER_WORD - 11);
+
+ /* Rotate the constant left by MSB bits. */
+ ival = (ival << rotcount) | (ival >> (BITS_PER_WORD - rotcount));
+
+ /* Now we can do the same tests as before. */
+ lsb = ctz_hwi (ival);
+ msb = BITS_PER_WORD - clz_hwi (ival);
+ if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0
+ && msb - lsb + 1 <= 11)
+ {
+ rtx output = gen_reg_rtx (word_mode);
+ rtx input = operands[1];
+
+ /* Rotate the source left by ROTCOUNT bits, we don't have
+ rotate left by a constant, so use rotate right by an
+ adjusted constant. */
+ rtx x = GEN_INT (BITS_PER_WORD - rotcount);
+ x = gen_rtx_ROTATERT (word_mode, input, x);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* We've already rotated the constant. So perform the IOR/XOR
+ operation. */
+ x = GEN_INT (ival);
+ x = gen_rtx_fmt_ee (code, word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* And rotate right to put everything into its proper place. */
+ x = GEN_INT (rotcount);
+ x = gen_rtx_ROTATERT (word_mode, input, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+ }
+
+ /* If after accounting for bseti the remaining budget has
+ gone to less than zero, it forces the value into a
+ register and performs the IOR operation. It returns
+ TRUE to the caller so the caller knows code generation
+ is complete. */
+ if (budget < 0)
+ {
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_fmt_ee (code, word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* Synthesis is better than loading the constant. */
+ ival = INTVAL (operands[2]);
+ rtx input = operands[1];
+ rtx output = NULL_RTX;
+
+ /* Emit the [x]ori insn that sets the low 11 bits into
+ the proper state. */
+ if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+ {
+ rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff));
+ x = gen_rtx_fmt_ee (code, word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ ival &= ~HOST_WIDE_INT_UC (0x7ff);
+ }
+
+ /* We figure out a single bit as a constant and
+ generate a CONST_INT node for that. Then we
+ construct the IOR node, then the SET node and
+ emit it. An IOR with a suitable constant that is
+ a single bit will be implemented with a bseti. */
+ while (ival)
+ {
+ HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ rtx x = GEN_INT (tmpval);
+ x = gen_rtx_fmt_ee (code, word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ ival &= ~tmpval;
+ }
+
+ gcc_assert (output);
+ emit_move_insn (operands[0], output);
+ return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] & OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_and (rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2]))
+ || (TARGET_ZBS && not_single_bit_mask_operand (operands[2], word_mode)))
+ return false;
+
+ /* If the second operand is a mode mask, emit an extension
+ insn instead. */
+ if (CONST_INT_P (operands[2]))
+ {
+ enum machine_mode tmode = VOIDmode;
+ if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode))
+ tmode = HImode;
+ else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode))
+ tmode = SImode;
+
+ if (tmode != VOIDmode)
+ {
+ rtx tmp = gen_lowpart (tmode, operands[1]);
+ emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1));
+ return true;
+ }
+ }
+
+ /* The number of instructions to synthesize the constant is a good
+ estimate of the budget. That does not account for out of order
+ execution an fusion in the constant synthesis those would naturally
+ decrease the budget. It also does not account for the AND at
+ the end of the sequence which would increase the budget. */
+ int budget = riscv_const_insns (operands[2], true);
+ rtx input = NULL_RTX;
+ rtx output = NULL_RTX;
+
+ /* Left shift + right shift to clear high bits. */
+ if (budget >= 2 && p2m1_shift_operand (operands[2], word_mode))
+ {
+ int count = (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant ()
+ - exact_log2 (INTVAL (operands[2]) + 1));
+ rtx x = gen_rtx_ASHIFT (word_mode, operands[1], GEN_INT (count));
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* Clears a bunch of low bits with only high bits set. */
+ unsigned HOST_WIDE_INT t = ~INTVAL (operands[2]);
+ if (budget >= 2 && exact_log2 (t + 1) >= 0)
+ {
+ int count = ctz_hwi (INTVAL (operands[2]));
+ rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* If we shift right to eliminate the trailing zeros and
+ the result is a SMALL_OPERAND, then it's a shift right,
+ andi and shift left. */
+ t = INTVAL (operands[2]);
+ t >>= ctz_hwi (t);
+ if (budget >= 3 && SMALL_OPERAND (t) && popcount_hwi (t) > 2)
+ {
+ /* Shift right to clear the low order bits. */
+ unsigned HOST_WIDE_INT count = ctz_hwi (INTVAL (operands[2]));
+ rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* Now emit the ANDI. */
+ unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+ mask >>= ctz_hwi (mask);
+ x = gen_rtx_AND (word_mode, input, GEN_INT (mask));
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* Shift left to move bits into position. */
+ count = INTVAL (operands[2]);
+ count = ctz_hwi (count);
+ x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* If there are all zeros, except for a run of 1s somewhere in the middle
+ of the constant, then this is at worst 3 shifts. */
+ t = INTVAL (operands[2]);
+ if (budget >= 3
+ && consecutive_bits_operand (GEN_INT (t), word_mode)
+ && popcount_hwi (t) > 3)
+ {
+ /* Shift right to clear the low order bits. */
+ int count = ctz_hwi (INTVAL (operands[2]));
+ rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* Shift left to clear the high order bits. */
+ count += clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD;
+ x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+
+ /* And shift back right to put the bits into position. */
+ count = clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD;
+ x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* The special cases didn't apply. It's entirely possible we may
+ want to combine some of the ideas above with bclr, but for now
+ those are deferred until we see them popping up in practice. */
+
+ unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]);
+
+ /* Clear as many bits using andi as we can. */
+ if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0)
+ {
+ ival &= ~HOST_WIDE_INT_UC (0x7ff);
+ budget--;
+ }
+
+ /* And handle remaining bits via bclr. */
+ while (TARGET_ZBS && ival)
+ {
+ unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ ival &= ~tmpval;
+ budget--;
+ }
+
+ /* If the remaining budget has gone to less than zero, it
+ forces the value into a register and performs the AND
+ operation. It returns TRUE to the caller so the caller
+ knows code generation is complete.
+ FIXME: This is hacked to always be enabled until the last
+ patch in the series is enabled. */
+ if (ival || budget < 0)
+ {
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_AND (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* Synthesis is better than loading the constant. */
+ ival = ~INTVAL (operands[2]);
+ input = operands[1];
+
+ /* Clear any of the lower 11 bits we need. */
+ if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+ {
+ rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff)));
+ x = gen_rtx_AND (word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ ival &= ~HOST_WIDE_INT_UC (0x7ff);
+ }
+
+ /* Clear the rest with bclr. */
+ while (ival)
+ {
+ unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ rtx x = GEN_INT (~tmpval);
+ x = gen_rtx_AND (word_mode, input, x);
+ output = gen_reg_rtx (word_mode);
+ emit_insn (gen_rtx_SET (output, x));
+ input = output;
+ ival &= ~tmpval;
+ }
+
+ emit_move_insn (operands[0], input);
+ return true;
+}
+
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -14068,6 +14815,9 @@ bool need_shadow_stack_push_pop_p ()
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
+#undef TARGET_SCHED_CAN_SPECULATE_INSN
+#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
+
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 2bcabd0..45fa521 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -43,7 +43,7 @@ along with GCC; see the file COPYING3. If not see
#endif
#ifndef RISCV_TUNE_STRING_DEFAULT
-#define RISCV_TUNE_STRING_DEFAULT "rocket"
+#define RISCV_TUNE_STRING_DEFAULT "generic"
#endif
extern const char *riscv_expand_arch (int argc, const char **argv);
@@ -888,7 +888,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
(PTR) = riscv_asm_output_opcode(STREAM, PTR)
-#define JUMP_TABLES_IN_TEXT_SECTION 0
+#define JUMP_TABLES_IN_TEXT_SECTION (riscv_cmodel == CM_LARGE)
#define CASE_VECTOR_MODE SImode
#define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 26a247c..3406b50 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -495,6 +495,8 @@
;; SiFive custom extension instrctions
;; sf_vqmacc vector matrix integer multiply-add instructions
;; sf_vfnrclip vector fp32 to int8 ranged clip instructions
+;; sf_vc vector coprocessor interface without side effect
+;; sf_vc_se vector coprocessor interface with side effect
(define_attr "type"
"unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore,
mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
@@ -516,7 +518,8 @@
vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
- vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"
+ vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+ sf_vc,sf_vc_se"
(cond [(eq_attr "got" "load") (const_string "load")
;; If a doubleword move uses these expensive instructions,
@@ -669,7 +672,7 @@
;; Microarchitectures we know how to tune for.
;; Keep this in sync with enum riscv_microarchitecture.
(define_attr "tune"
- "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo"
+ "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700"
(const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
;; Describe a user's asm statement.
@@ -789,7 +792,7 @@
rtx t5 = gen_reg_rtx (DImode);
rtx t6 = gen_reg_rtx (DImode);
- riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
+ emit_insn (gen_addsi3_extended (t6, operands[1], operands[2]));
if (GET_CODE (operands[1]) != CONST_INT)
emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
else
@@ -799,7 +802,10 @@
else
t5 = operands[2];
emit_insn (gen_adddi3 (t3, t4, t5));
- emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+ rtx t7 = gen_lowpart (SImode, t6);
+ SUBREG_PROMOTED_VAR_P (t7) = 1;
+ SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+ emit_move_insn (operands[0], t7);
riscv_expand_conditional_branch (operands[3], NE, t6, t3);
}
@@ -835,8 +841,11 @@
emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
else
t3 = operands[1];
- riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
- emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+ emit_insn (gen_addsi3_extended (t4, operands[1], operands[2]));
+ rtx t5 = gen_lowpart (SImode, t4);
+ SUBREG_PROMOTED_VAR_P (t5) = 1;
+ SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+ emit_move_insn (operands[0], t5);
riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
}
@@ -875,7 +884,7 @@
;; Where C1 is not a LUI operand, but ~C1 is a LUI operand
(define_insn_and_split "*lui_constraint<X:mode>_and_to_or"
- [(set (match_operand:X 0 "register_operand" "=r")
+ [(set (match_operand:X 0 "register_operand" "=r")
(plus:X (and:X (match_operand:X 1 "register_operand" "r")
(match_operand 2 "const_int_operand"))
(match_operand 3 "const_int_operand")))
@@ -889,13 +898,21 @@
<= riscv_const_insns (operands[3], false)))"
"#"
"&& reload_completed"
- [(set (match_dup 4) (match_dup 5))
- (set (match_dup 0) (ior:X (match_dup 1) (match_dup 4)))
- (set (match_dup 4) (match_dup 6))
- (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))]
+ [(const_int 0)]
{
operands[5] = GEN_INT (~INTVAL (operands[2]));
operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3])));
+
+ /* This is always a LUI operand, so it's safe to just emit. */
+ emit_move_insn (operands[4], operands[5]);
+
+ rtx x = gen_rtx_IOR (word_mode, operands[1], operands[4]);
+ emit_move_insn (operands[0], x);
+
+ /* This may require multiple steps to synthesize. */
+ riscv_emit_move (operands[4], operands[6]);
+ x = gen_rtx_MINUS (word_mode, operands[0], operands[4]);
+ emit_move_insn (operands[0], x);
}
[(set_attr "type" "arith")])
@@ -966,7 +983,7 @@
rtx t5 = gen_reg_rtx (DImode);
rtx t6 = gen_reg_rtx (DImode);
- riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
+ emit_insn (gen_subsi3_extended (t6, operands[1], operands[2]));
if (GET_CODE (operands[1]) != CONST_INT)
emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
else
@@ -976,7 +993,10 @@
else
t5 = operands[2];
emit_insn (gen_subdi3 (t3, t4, t5));
- emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+ rtx t7 = gen_lowpart (SImode, t6);
+ SUBREG_PROMOTED_VAR_P (t7) = 1;
+ SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+ emit_move_insn (operands[0], t7);
riscv_expand_conditional_branch (operands[3], NE, t6, t3);
}
@@ -1015,8 +1035,11 @@
emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
else
t3 = operands[1];
- riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
- emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+ emit_insn (gen_subsi3_extended (t4, operands[1], operands[2]));
+ rtx t5 = gen_lowpart (SImode, t4);
+ SUBREG_PROMOTED_VAR_P (t5) = 1;
+ SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+ emit_move_insn (operands[0], t5);
riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
}
@@ -1709,26 +1732,11 @@
(define_expand "and<mode>3"
[(set (match_operand:X 0 "register_operand")
(and:X (match_operand:X 1 "register_operand")
- (match_operand:X 2 "arith_or_mode_mask_or_zbs_operand")))]
+ (match_operand:X 2 "reg_or_const_int_operand")))]
""
{
- /* If the second operand is a mode mask, emit an extension
- insn instead. */
- if (CONST_INT_P (operands[2]))
- {
- enum machine_mode tmode = VOIDmode;
- if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode))
- tmode = HImode;
- else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode))
- tmode = SImode;
-
- if (tmode != VOIDmode)
- {
- rtx tmp = gen_lowpart (tmode, operands[1]);
- emit_insn (gen_extend_insn (operands[0], tmp, <MODE>mode, tmode, 1));
- DONE;
- }
- }
+ if (CONST_INT_P (operands[2]) && synthesize_and (operands))
+ DONE;
})
(define_insn "*and<mode>3"
@@ -1752,8 +1760,15 @@
(define_expand "<optab><mode>3"
[(set (match_operand:X 0 "register_operand")
(any_or:X (match_operand:X 1 "register_operand" "")
- (match_operand:X 2 "arith_or_zbs_operand" "")))]
- "")
+ (match_operand:X 2 "reg_or_const_int_operand" "")))]
+ ""
+
+{
+ /* If synthesis of the logical op is successful, then no further code
+ generation is necessary. Else just generate code normally. */
+ if (CONST_INT_P (operands[2]) && synthesize_ior_xor (<OPTAB>, operands))
+ DONE;
+})
(define_insn "*<optab><mode>3"
[(set (match_operand:X 0 "register_operand" "=r,r")
@@ -2494,8 +2509,8 @@
})
(define_insn "*movdi_32bit"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m, *f,*f,*r,*f,*m,r")
- (match_operand:DI 1 "move_operand" " r,i,m,r,*J*r,*m,*f,*f,*f,vp"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, *f,*f,*r,*f,*m,r")
+ (match_operand:DI 1 "move_operand" " r,i,m,rJ,*J*r,*m,*f,*f,*f,vp"))]
"!TARGET_64BIT
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
@@ -2922,7 +2937,7 @@
[(set_attr "type" "shift")
(set_attr "mode" "DI")])
-(define_insn_and_split "*<optab><GPR:mode>3_mask_1"
+(define_insn "*<optab><GPR:mode>3_mask_1"
[(set (match_operand:GPR 0 "register_operand" "= r")
(any_shift:GPR
(match_operand:GPR 1 "register_operand" " r")
@@ -2931,12 +2946,14 @@
(match_operand:GPR2 2 "register_operand" "r")
(match_operand 3 "<GPR:shiftm1>"))])))]
""
- "#"
- "&& 1"
- [(set (match_dup 0)
- (any_shift:GPR (match_dup 1)
- (match_dup 2)))]
- "operands[2] = gen_lowpart (QImode, operands[2]);"
+{
+ /* If the shift mode is not word mode, then it must be the
+ case that we're generating rv64 code, but this is a 32-bit
+ operation. Thus we need to use the "w" variant. */
+ if (E_<GPR:MODE>mode != word_mode)
+ return "<insn>w\t%0,%1,%2";
+ return "<insn>\t%0,%1,%2";
+}
[(set_attr "type" "shift")
(set_attr "mode" "<GPR:MODE>")])
@@ -2955,7 +2972,7 @@
[(set_attr "type" "shift")
(set_attr "mode" "SI")])
-(define_insn_and_split "*<optab>si3_extend_mask"
+(define_insn "*<optab>si3_extend_mask"
[(set (match_operand:DI 0 "register_operand" "= r")
(sign_extend:DI
(any_shift:SI
@@ -2965,13 +2982,7 @@
(match_operand:GPR 2 "register_operand" " r")
(match_operand 3 "const_si_mask_operand"))]))))]
"TARGET_64BIT"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (sign_extend:DI
- (any_shift:SI (match_dup 1)
- (match_dup 2))))]
- "operands[2] = gen_lowpart (QImode, operands[2]);"
+ "<insn>w\t%0,%1,%2"
[(set_attr "type" "shift")
(set_attr "mode" "SI")])
@@ -3169,15 +3180,25 @@
"#"
"&& reload_completed"
[(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6)))
- (set (match_dup 4) (and:X (match_dup 4) (match_dup 7)))
+ (set (match_dup 4) (match_dup 8))
(set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
(label_ref (match_dup 0)) (pc)))]
{
- HOST_WIDE_INT mask = INTVAL (operands[3]);
- int trailing = ctz_hwi (mask);
+ HOST_WIDE_INT mask = INTVAL (operands[3]);
+ int trailing = ctz_hwi (mask);
+
+ operands[6] = GEN_INT (trailing);
+ operands[7] = GEN_INT (mask >> trailing);
- operands[6] = GEN_INT (trailing);
- operands[7] = GEN_INT (mask >> trailing);
+ /* This splits after reload, so there's little chance to clean things
+ up. Rather than emit a ton of RTL here, we can just make a new
+ operand for that RHS and use it. For the case where the AND would
+ have been redundant, we can make it a NOP move, which does get
+ cleaned up. */
+ if (operands[7] == CONSTM1_RTX (word_mode))
+ operands[8] = operands[4];
+ else
+ operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]);
}
[(set_attr "type" "branch")])
@@ -4381,7 +4402,7 @@
)
(define_insn "prefetch"
- [(prefetch (match_operand 0 "address_operand" "r")
+ [(prefetch (match_operand 0 "prefetch_operand" "Q")
(match_operand 1 "imm5_operand" "i")
(match_operand 2 "const_int_operand" "n"))]
"TARGET_ZICBOP"
@@ -4401,7 +4422,7 @@
(const_string "4")))])
(define_insn "riscv_prefetchi_<mode>"
- [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r")
+ [(unspec_volatile:X [(match_operand:X 0 "prefetch_operand" "Q")
(match_operand:X 1 "imm5_operand" "i")]
UNSPECV_PREI)]
"TARGET_ZICBOP"
@@ -4691,23 +4712,38 @@
(match_operand 2 "const_int_operand" "n"))
(match_operand 3 "const_int_operand" "n")))
(clobber (match_scratch:DI 4 "=&r"))]
- "(TARGET_64BIT && riscv_const_insns (operands[3], false) == 1)"
+ "(TARGET_64BIT
+ && riscv_const_insns (operands[3], false) == 1
+ && riscv_const_insns (GEN_INT (INTVAL (operands[3])
+ << INTVAL (operands[2])), false) != 1)"
"#"
"&& reload_completed"
[(const_int 0)]
"{
- rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
- emit_insn (gen_rtx_SET (operands[0], x));
-
- /* If the constant fits in a simm12, use it directly as we do not
- get another good chance to optimize things again. */
- if (!SMALL_OPERAND (INTVAL (operands[3])))
+ /* Prefer to generate shNadd when we can, even over using an
+ immediate form. If we're not going to be able to generate
+ a shNadd, then use the constant directly if it fits in a
+ simm12 field since we won't get another chance to optimize this. */
+ if ((TARGET_ZBA && imm123_operand (operands[2], word_mode))
+ || !SMALL_OPERAND (INTVAL (operands[3])))
emit_move_insn (operands[4], operands[3]);
else
operands[4] = operands[3];
- x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
- emit_insn (gen_rtx_SET (operands[0], x));
+ if (TARGET_ZBA && imm123_operand (operands[2], word_mode))
+ {
+ rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+ x = gen_rtx_PLUS (DImode, x, operands[4]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+ else
+ {
+ rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ }
+
DONE;
}"
[(set_attr "type" "arith")])
@@ -4806,6 +4842,24 @@
[(set_attr "type" "move")
(set_attr "mode" "<MODE>")])
+;; If we're trying to create 0 or 2^n-1 based on the result of
+;; a test such as (lt (reg) (const_int 0)), we'll see a splat of
+;; the sign bit across a GPR using srai, then a logical and to
+;; mask off high bits. We can replace the logical and with
+;; a logical right shift which works without constant synthesis
+;; for larger constants.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (and:X (ashiftrt:X (match_operand:X 1 "register_operand")
+ (match_operand 2 "const_int_operand"))
+ (match_operand 3 "const_int_operand")))]
+ "(INTVAL (operands[2]) == BITS_PER_WORD - 1
+ && exact_log2 (INTVAL (operands[3]) + 1) >= 0)"
+ [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (lshiftrt:X (match_dup 0) (match_dup 3)))]
+ { operands[3] = GEN_INT (BITS_PER_WORD
+ - exact_log2 (INTVAL (operands[3]) + 1)); })
+
(include "bitmanip.md")
(include "crypto.md")
(include "sync.md")
@@ -4828,3 +4882,4 @@
(include "zc.md")
(include "corev.md")
(include "xiangshan.md")
+(include "mips-p8700.md")
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7515c8e..6543fd1 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -168,23 +168,14 @@ momit-leaf-frame-pointer
Target Mask(OMIT_LEAF_FRAME_POINTER) Save
Omit the frame pointer in leaf functions.
-Mask(64BIT)
-
-Mask(MUL)
-
-Mask(ATOMIC)
-
-Mask(HARD_FLOAT)
-
-Mask(DOUBLE_FLOAT)
-
-Mask(RVC)
+TargetVariable
+int riscv_isa_flags
-Mask(RVE)
+Mask(64BIT) Var(riscv_isa_flags)
-Mask(VECTOR)
+Mask(VECTOR) Var(riscv_isa_flags)
-Mask(FULL_V)
+Mask(FULL_V) Var(riscv_isa_flags)
mriscv-attribute
Target Var(riscv_emit_attribute_p) Init(-1)
@@ -233,93 +224,6 @@ TargetVariable
long riscv_stack_protector_guard_offset = 0
TargetVariable
-int riscv_zi_subext
-
-Mask(ZICSR) Var(riscv_zi_subext)
-
-Mask(ZIFENCEI) Var(riscv_zi_subext)
-
-Mask(ZIHINTNTL) Var(riscv_zi_subext)
-
-Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
-
-Mask(ZICOND) Var(riscv_zi_subext)
-
-Mask(ZICCAMOA) Var(riscv_zi_subext)
-
-Mask(ZICCIF) Var(riscv_zi_subext)
-
-Mask(ZICCLSM) Var(riscv_zi_subext)
-
-Mask(ZICCRSE) Var(riscv_zi_subext)
-
-Mask(ZICFISS) Var(riscv_zi_subext)
-
-Mask(ZICFILP) Var(riscv_zi_subext)
-
-TargetVariable
-int riscv_za_subext
-
-Mask(ZAWRS) Var(riscv_za_subext)
-
-Mask(ZAAMO) Var(riscv_za_subext)
-
-Mask(ZALRSC) Var(riscv_za_subext)
-
-Mask(ZABHA) Var(riscv_za_subext)
-
-Mask(ZACAS) Var(riscv_za_subext)
-
-Mask(ZA64RS) Var(riscv_za_subext)
-
-Mask(ZA128RS) Var(riscv_za_subext)
-
-TargetVariable
-int riscv_zb_subext
-
-Mask(ZBA) Var(riscv_zb_subext)
-
-Mask(ZBB) Var(riscv_zb_subext)
-
-Mask(ZBC) Var(riscv_zb_subext)
-
-Mask(ZBS) Var(riscv_zb_subext)
-
-TargetVariable
-int riscv_zinx_subext
-
-Mask(ZFINX) Var(riscv_zinx_subext)
-
-Mask(ZDINX) Var(riscv_zinx_subext)
-
-Mask(ZHINX) Var(riscv_zinx_subext)
-
-Mask(ZHINXMIN) Var(riscv_zinx_subext)
-
-TargetVariable
-int riscv_zk_subext
-
-Mask(ZBKB) Var(riscv_zk_subext)
-
-Mask(ZBKC) Var(riscv_zk_subext)
-
-Mask(ZBKX) Var(riscv_zk_subext)
-
-Mask(ZKNE) Var(riscv_zk_subext)
-
-Mask(ZKND) Var(riscv_zk_subext)
-
-Mask(ZKNH) Var(riscv_zk_subext)
-
-Mask(ZKR) Var(riscv_zk_subext)
-
-Mask(ZKSED) Var(riscv_zk_subext)
-
-Mask(ZKSH) Var(riscv_zk_subext)
-
-Mask(ZKT) Var(riscv_zk_subext)
-
-TargetVariable
int riscv_vector_elen_flags
Mask(VECTOR_ELEN_32) Var(riscv_vector_elen_flags)
@@ -335,207 +239,6 @@ Mask(VECTOR_ELEN_FP_16) Var(riscv_vector_elen_flags)
Mask(VECTOR_ELEN_BF_16) Var(riscv_vector_elen_flags)
TargetVariable
-int riscv_zvl_flags
-
-Mask(ZVL32B) Var(riscv_zvl_flags)
-
-Mask(ZVL64B) Var(riscv_zvl_flags)
-
-Mask(ZVL128B) Var(riscv_zvl_flags)
-
-Mask(ZVL256B) Var(riscv_zvl_flags)
-
-Mask(ZVL512B) Var(riscv_zvl_flags)
-
-Mask(ZVL1024B) Var(riscv_zvl_flags)
-
-Mask(ZVL2048B) Var(riscv_zvl_flags)
-
-Mask(ZVL4096B) Var(riscv_zvl_flags)
-
-Mask(ZVL8192B) Var(riscv_zvl_flags)
-
-Mask(ZVL16384B) Var(riscv_zvl_flags)
-
-Mask(ZVL32768B) Var(riscv_zvl_flags)
-
-Mask(ZVL65536B) Var(riscv_zvl_flags)
-
-TargetVariable
-int riscv_zvb_subext
-
-Mask(ZVBB) Var(riscv_zvb_subext)
-
-Mask(ZVBC) Var(riscv_zvb_subext)
-
-Mask(ZVKB) Var(riscv_zvb_subext)
-
-TargetVariable
-int riscv_zvk_subext
-
-Mask(ZVKG) Var(riscv_zvk_subext)
-
-Mask(ZVKNED) Var(riscv_zvk_subext)
-
-Mask(ZVKNHA) Var(riscv_zvk_subext)
-
-Mask(ZVKNHB) Var(riscv_zvk_subext)
-
-Mask(ZVKSED) Var(riscv_zvk_subext)
-
-Mask(ZVKSH) Var(riscv_zvk_subext)
-
-Mask(ZVKN) Var(riscv_zvk_subext)
-
-Mask(ZVKNC) Var(riscv_zvk_subext)
-
-Mask(ZVKNG) Var(riscv_zvk_subext)
-
-Mask(ZVKS) Var(riscv_zvk_subext)
-
-Mask(ZVKSC) Var(riscv_zvk_subext)
-
-Mask(ZVKSG) Var(riscv_zvk_subext)
-
-Mask(ZVKT) Var(riscv_zvk_subext)
-
-TargetVariable
-int riscv_zicmo_subext
-
-Mask(ZICBOZ) Var(riscv_zicmo_subext)
-
-Mask(ZICBOM) Var(riscv_zicmo_subext)
-
-Mask(ZICBOP) Var(riscv_zicmo_subext)
-
-Mask(ZIC64B) Var(riscv_zicmo_subext)
-
-TargetVariable
-int riscv_mop_subext
-
-Mask(ZIMOP) Var(riscv_mop_subext)
-
-Mask(ZCMOP) Var(riscv_mop_subext)
-
-TargetVariable
-int riscv_zf_subext
-
-Mask(ZFBFMIN) Var(riscv_zf_subext)
-
-Mask(ZFHMIN) Var(riscv_zf_subext)
-
-Mask(ZFH) Var(riscv_zf_subext)
-
-Mask(ZVFBFMIN) Var(riscv_zf_subext)
-
-Mask(ZVFBFWMA) Var(riscv_zf_subext)
-
-Mask(ZVFHMIN) Var(riscv_zf_subext)
-
-Mask(ZVFH) Var(riscv_zf_subext)
-
-TargetVariable
-int riscv_zfa_subext
-
-Mask(ZFA) Var(riscv_zfa_subext)
-
-TargetVariable
-int riscv_zm_subext
-
-Mask(ZMMUL) Var(riscv_zm_subext)
-
-TargetVariable
-int riscv_zc_subext
-
-Mask(ZCA) Var(riscv_zc_subext)
-
-Mask(ZCB) Var(riscv_zc_subext)
-
-Mask(ZCE) Var(riscv_zc_subext)
-
-Mask(ZCF) Var(riscv_zc_subext)
-
-Mask(ZCD) Var(riscv_zc_subext)
-
-Mask(ZCMP) Var(riscv_zc_subext)
-
-Mask(ZCMT) Var(riscv_zc_subext)
-
-Mask(XCVBI) Var(riscv_xcv_subext)
-
-TargetVariable
-int riscv_sv_subext
-
-Mask(SVINVAL) Var(riscv_sv_subext)
-
-Mask(SVNAPOT) Var(riscv_sv_subext)
-
-Mask(SVVPTC) Var(riscv_sv_subext)
-
-TargetVariable
-int riscv_ztso_subext
-
-Mask(ZTSO) Var(riscv_ztso_subext)
-
-TargetVariable
-int riscv_xcv_subext
-
-Mask(XCVMAC) Var(riscv_xcv_subext)
-
-Mask(XCVALU) Var(riscv_xcv_subext)
-
-Mask(XCVELW) Var(riscv_xcv_subext)
-
-Mask(XCVSIMD) Var(riscv_xcv_subext)
-
-TargetVariable
-int riscv_xthead_subext
-
-Mask(XTHEADBA) Var(riscv_xthead_subext)
-
-Mask(XTHEADBB) Var(riscv_xthead_subext)
-
-Mask(XTHEADBS) Var(riscv_xthead_subext)
-
-Mask(XTHEADCMO) Var(riscv_xthead_subext)
-
-Mask(XTHEADCONDMOV) Var(riscv_xthead_subext)
-
-Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext)
-
-Mask(XTHEADFMV) Var(riscv_xthead_subext)
-
-Mask(XTHEADINT) Var(riscv_xthead_subext)
-
-Mask(XTHEADMAC) Var(riscv_xthead_subext)
-
-Mask(XTHEADMEMIDX) Var(riscv_xthead_subext)
-
-Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext)
-
-Mask(XTHEADSYNC) Var(riscv_xthead_subext)
-
-Mask(XTHEADVECTOR) Var(riscv_xthead_subext)
-
-TargetVariable
-int riscv_xventana_subext
-
-Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
-
-TargetVariable
-int riscv_sifive_subext
-
-Mask(XSFVCP) Var(riscv_sifive_subext)
-
-Mask(XSFCEASE) Var(riscv_sifive_subext)
-
-Mask(XSFVQMACCQOQ) Var(riscv_sifive_subext)
-
-Mask(XSFVQMACCDOD) Var(riscv_sifive_subext)
-
-Mask(XSFVFNRCLIPXFQF) Var(riscv_sifive_subext)
-
-TargetVariable
int riscv_fmv_priority = 0
Enum
@@ -579,6 +282,18 @@ Inline strlen calls if possible.
Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64)
Max number of bytes to compare as part of inlined strcmp/strncmp routines (default: 64).
+-param=gpr2vr-cost=
+Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from GPR to VR.
+
+-param=fpr2vr-cost=
+Target RejectNegative Joined UInteger Var(fpr2vr_cost) Init(FPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from FPR to VR.
+
+-param=riscv-autovec-mode=
+Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save
+Set the only autovec mode to try.
+
Enum
Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum)
The RVV possible LMUL (-mrvv-max-lmul=):
diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.cc b/gcc/config/riscv/sifive-vector-builtins-bases.cc
index 85e1b6f..be530ca 100644
--- a/gcc/config/riscv/sifive-vector-builtins-bases.cc
+++ b/gcc/config/riscv/sifive-vector-builtins-bases.cc
@@ -195,12 +195,89 @@ public:
}
};
+/* Implements SiFive sf.vc. */
+class sf_vc : public function_base
+{
+public:
+
+ unsigned int call_properties (const function_instance &) const override
+ {
+ return CP_USE_COPROCESSORS;
+ }
+
+ rtx expand (function_expander &e) const override
+ {
+ switch (e.op_info->op)
+ {
+ case OP_TYPE_x:
+ return e.use_exact_insn (code_for_sf_vc_x_se (e.vector_mode ()));
+ case OP_TYPE_i:
+ return e.use_exact_insn (code_for_sf_vc_i_se (e.vector_mode ()));
+ case OP_TYPE_vv:
+ return e.use_exact_insn (code_for_sf_vc_vv_se (e.vector_mode ()));
+ case OP_TYPE_xv:
+ return e.use_exact_insn (code_for_sf_vc_xv_se (e.vector_mode ()));
+ case OP_TYPE_iv:
+ return e.use_exact_insn (code_for_sf_vc_iv_se (e.vector_mode ()));
+ case OP_TYPE_fv:
+ return e.use_exact_insn (code_for_sf_vc_fv_se (e.vector_mode ()));
+ case OP_TYPE_v_x:
+ return e.use_exact_insn (code_for_sf_vc_v_x_se (e.vector_mode ()));
+ case OP_TYPE_v_i:
+ return e.use_exact_insn (code_for_sf_vc_v_i_se (e.vector_mode ()));
+ case OP_TYPE_v_vv:
+ return e.use_exact_insn (code_for_sf_vc_v_vv_se (e.vector_mode ()));
+ case OP_TYPE_v_xv:
+ return e.use_exact_insn (code_for_sf_vc_v_xv_se (e.vector_mode ()));
+ case OP_TYPE_v_iv:
+ return e.use_exact_insn (code_for_sf_vc_v_iv_se (e.vector_mode ()));
+ case OP_TYPE_v_fv:
+ return e.use_exact_insn (code_for_sf_vc_v_fv_se (e.vector_mode ()));
+ case OP_TYPE_vvv:
+ return e.use_exact_insn (code_for_sf_vc_vvv_se (e.vector_mode ()));
+ case OP_TYPE_xvv:
+ return e.use_exact_insn (code_for_sf_vc_xvv_se (e.vector_mode ()));
+ case OP_TYPE_ivv:
+ return e.use_exact_insn (code_for_sf_vc_ivv_se (e.vector_mode ()));
+ case OP_TYPE_fvv:
+ return e.use_exact_insn (code_for_sf_vc_fvv_se (e.vector_mode ()));
+ case OP_TYPE_vvw:
+ return e.use_exact_insn (code_for_sf_vc_vvw_se (e.vector_mode ()));
+ case OP_TYPE_xvw:
+ return e.use_exact_insn (code_for_sf_vc_xvw_se (e.vector_mode ()));
+ case OP_TYPE_ivw:
+ return e.use_exact_insn (code_for_sf_vc_ivw_se (e.vector_mode ()));
+ case OP_TYPE_fvw:
+ return e.use_exact_insn (code_for_sf_vc_fvw_se (e.vector_mode ()));
+ case OP_TYPE_v_vvv:
+ return e.use_exact_insn (code_for_sf_vc_v_vvv_se (e.vector_mode ()));
+ case OP_TYPE_v_xvv:
+ return e.use_exact_insn (code_for_sf_vc_v_xvv_se (e.vector_mode ()));
+ case OP_TYPE_v_ivv:
+ return e.use_exact_insn (code_for_sf_vc_v_ivv_se (e.vector_mode ()));
+ case OP_TYPE_v_fvv:
+ return e.use_exact_insn (code_for_sf_vc_v_fvv_se (e.vector_mode ()));
+ case OP_TYPE_v_vvw:
+ return e.use_exact_insn (code_for_sf_vc_v_vvw_se (e.vector_mode ()));
+ case OP_TYPE_v_xvw:
+ return e.use_exact_insn (code_for_sf_vc_v_xvw_se (e.vector_mode ()));
+ case OP_TYPE_v_ivw:
+ return e.use_exact_insn (code_for_sf_vc_v_ivw_se (e.vector_mode ()));
+ case OP_TYPE_v_fvw:
+ return e.use_exact_insn (code_for_sf_vc_v_fvw_se (e.vector_mode ()));
+ default:
+ gcc_unreachable ();
+ }
+ }
+};
+
static CONSTEXPR const sf_vqmacc sf_vqmacc_obj;
static CONSTEXPR const sf_vqmaccu sf_vqmaccu_obj;
static CONSTEXPR const sf_vqmaccsu sf_vqmaccsu_obj;
static CONSTEXPR const sf_vqmaccus sf_vqmaccus_obj;
static CONSTEXPR const sf_vfnrclip_x_f_qf<UNSPEC_SF_VFNRCLIP> sf_vfnrclip_x_f_qf_obj;
static CONSTEXPR const sf_vfnrclip_xu_f_qf<UNSPEC_SF_VFNRCLIPU> sf_vfnrclip_xu_f_qf_obj;
+static CONSTEXPR const sf_vc sf_vc_obj;
/* Declare the function base NAME, pointing it to an instance
of class <NAME>_obj. */
@@ -213,4 +290,5 @@ BASE (sf_vqmaccsu)
BASE (sf_vqmaccus)
BASE (sf_vfnrclip_x_f_qf)
BASE (sf_vfnrclip_xu_f_qf)
+BASE (sf_vc)
} // end namespace riscv_vector
diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.h b/gcc/config/riscv/sifive-vector-builtins-bases.h
index 69e5540..4ec1e30 100644
--- a/gcc/config/riscv/sifive-vector-builtins-bases.h
+++ b/gcc/config/riscv/sifive-vector-builtins-bases.h
@@ -23,6 +23,8 @@
namespace riscv_vector {
+static const unsigned int CP_USE_COPROCESSORS = 1U << 6;
+
namespace bases {
extern const function_base *const sf_vqmacc;
extern const function_base *const sf_vqmaccu;
@@ -30,6 +32,7 @@ extern const function_base *const sf_vqmaccsu;
extern const function_base *const sf_vqmaccus;
extern const function_base *const sf_vfnrclip_x_f_qf;
extern const function_base *const sf_vfnrclip_xu_f_qf;
+extern const function_base *const sf_vc;
}
} // end namespace riscv_vector
diff --git a/gcc/config/riscv/sifive-vector-builtins-functions.def b/gcc/config/riscv/sifive-vector-builtins-functions.def
index e6621c7..f6703ae 100644
--- a/gcc/config/riscv/sifive-vector-builtins-functions.def
+++ b/gcc/config/riscv/sifive-vector-builtins-functions.def
@@ -55,4 +55,49 @@ DEF_RVV_FUNCTION (sf_vfnrclip_x_f_qf, sf_vfnrclip, full_preds, i_clip_qf_ops)
DEF_RVV_FUNCTION (sf_vfnrclip_xu_f_qf, sf_vfnrclip, full_preds, u_clip_qf_ops)
#undef REQUIRED_EXTENSIONS
+#define REQUIRED_EXTENSIONS XSFVCP_EXT
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvw_ops)
+#undef REQUIRED_EXTENSIONS
+
#undef DEF_RVV_FUNCTION
diff --git a/gcc/config/riscv/sifive-vector.md b/gcc/config/riscv/sifive-vector.md
index 2975b1e..a416634 100644
--- a/gcc/config/riscv/sifive-vector.md
+++ b/gcc/config/riscv/sifive-vector.md
@@ -182,3 +182,874 @@
"sf.vfnrclip.x<v_su>.f.qf\t%0,%3,%4%p1"
[(set_attr "type" "sf_vfnrclip")
(set_attr "mode" "<MODE>")])
+
+;; SF_VCP
+(define_insn "@sf_vc_x_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:SI 2 "const_int_operand" "K")
+ (match_operand:SI 3 "const_int_operand" "K")
+ (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.x\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_x_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:SI 4 "const_int_operand" "K,K")
+ (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.x\t%3,%4,%0,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_x<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:SI 4 "const_int_operand" "K,K")
+ (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.x\t%3,%4,%0,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_i_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:SI 2 "const_int_operand" "K")
+ (match_operand:SI 3 "const_int_operand" "K")
+ (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.i\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_i_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:SI 4 "const_int_operand" "K,K")
+ (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.i\t%3,%4,%0,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_i<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:SI 4 "const_int_operand" "K,K")
+ (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.i\t%3,%4,%0,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:SI 2 "const_int_operand" "K")
+ (match_operand:VFULLI 3 "register_operand" "vr")
+ (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.vv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vv_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vr,vr")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.vv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vv<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vr,vr")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.vv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:SI 2 "const_int_operand" "K")
+ (match_operand:VFULLI 3 "register_operand" "vr")
+ (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.xv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xv_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vr,vr")
+ (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.xv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xv<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vr,vr")
+ (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.xv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_iv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:SI 2 "const_int_operand" "K")
+ (match_operand:VFULLI 3 "register_operand" "vr")
+ (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.iv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_iv_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vr,vr")
+ (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.iv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_iv<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vr,vr")
+ (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.iv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:SF_FV
+ [(match_operand:SI 1 "const_int_operand" "Ou01")
+ (match_operand:SI 2 "const_int_operand" "K")
+ (match_operand:SF_FV 3 "register_operand" "vr")
+ (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.fv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fv_se<mode>"
+ [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd")
+ (if_then_else:SF_FV
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:SF_FV
+ [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+ (match_operand:SF_FV 4 "register_operand" "vr,vr")
+ (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPECV_SF_CV)
+ (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.fv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fv<mode>"
+ [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd")
+ (if_then_else:SF_FV
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 6 "vector_length_operand" " rK, rK")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:SF_FV
+ [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+ (match_operand:SF_FV 4 "register_operand" "vr,vr")
+ (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPEC_SF_CV)
+ (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.fv\t%3,%0,%4,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vvv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:VFULLI 2 "register_operand" "vd")
+ (match_operand:VFULLI 3 "register_operand" "vr")
+ (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.vvv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvv_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vd,vd")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")
+ (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.vvv\t%3,%4,%6,%5"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvv<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vd,vd")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")
+ (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.vvv\t%3,%4,%6,%5"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xvv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:VFULLI 2 "register_operand" "vd")
+ (match_operand:VFULLI 3 "register_operand" "vr")
+ (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.xvv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvv_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vd,vd")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")
+ (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.xvv\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvv<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vd,vd")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")
+ (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.xvv\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_ivv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:VFULLI 2 "register_operand" "vd")
+ (match_operand:VFULLI 3 "register_operand" "vr")
+ (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.ivv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivv_se<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vd,vd")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")
+ (match_operand:SI 6 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.ivv\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivv<mode>"
+ [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+ (if_then_else:VFULLI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VFULLI
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:VFULLI 4 "register_operand" "vd,vd")
+ (match_operand:VFULLI 5 "register_operand" "vr,vr")
+ (match_operand:SI 6 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+ (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.ivv\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fvv_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:SF_FV
+ [(match_operand:SI 1 "const_int_operand" "Ou01")
+ (match_operand:SF_FV 2 "register_operand" "vd")
+ (match_operand:SF_FV 3 "register_operand" "vr")
+ (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.fvv\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvv_se<mode>"
+ [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr")
+ (if_then_else:SF_FV
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:SF_FV
+ [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+ (match_operand:SF_FV 4 "register_operand" "vd,vd")
+ (match_operand:SF_FV 5 "register_operand" "vr,vr")
+ (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPECV_SF_CV)
+ (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.fvv\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvv<mode>"
+ [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr")
+ (if_then_else:SF_FV
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:SF_FV
+ [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+ (match_operand:SF_FV 4 "register_operand" "vd,vd")
+ (match_operand:SF_FV 5 "register_operand" "vr,vr")
+ (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPEC_SF_CV)
+ (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.fvv\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vvw_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:<SF_VW> 2 "register_operand" "vd")
+ (match_operand:SF_VC_W 3 "register_operand" "vr")
+ (match_operand:SF_VC_W 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.vvw\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvw_se<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+ (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.vvw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvw<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+ (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.vvw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xvw_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:<SF_VW> 2 "register_operand" "vd")
+ (match_operand:SF_VC_W 3 "register_operand" "vr")
+ (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.xvw\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvw_se<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+ (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.xvw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvw<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+ (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.xvw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_ivw_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 1 "const_int_operand" "Ou02")
+ (match_operand:<SF_VW> 2 "register_operand" "vd")
+ (match_operand:SF_VC_W 3 "register_operand" "vr")
+ (match_operand:SI 4 "immediate_operand" "P")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.ivw\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivw_se<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec_volatile:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+ (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.ivw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivw<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+ (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.ivw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fvw_se<mode>"
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" " Wc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 1 "const_int_operand" "Ou01")
+ (match_operand:<SF_VW> 2 "register_operand" "vd")
+ (match_operand:SF_VC_FW 3 "register_operand" "vr")
+ (match_operand:<SF_XFW> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.fvw\t%1,%2,%3,%4"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvw_se<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec_volatile:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_FW 5 "register_operand" "vr,vr")
+ (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPECV_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.fvw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc_se")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvw<mode>"
+ [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+ (if_then_else:<SF_VW>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1")
+ (match_operand 7 "vector_length_operand" " rK, rK")
+ (match_operand 8 "const_int_operand" " i, i")
+ (match_operand 9 "const_int_operand" " i, i")
+ (match_operand 10 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<SF_VW>
+ [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+ (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+ (match_operand:SF_VC_FW 5 "register_operand" "vr,vr")
+ (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPEC_SF_CV)
+ (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))]
+ "TARGET_VECTOR && TARGET_XSFVCP"
+ "sf.vc.v.fvw\t%3,%4,%5,%6"
+ [(set_attr "type" "sf_vc")
+ (set_attr "mode" "<MODE>")])
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 726800a..a75ea68 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -405,18 +405,17 @@
(match_operand:SI 3 "const_int_operand")] ;; model
UNSPEC_SYNC_EXCHANGE))
(set (match_dup 1)
- (match_operand:GPR 2 "register_operand" "0"))
+ (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
(clobber (match_scratch:GPR 4 "=&r"))] ;; tmp_1
"!TARGET_ZAAMO && TARGET_ZALRSC"
{
return "1:\;"
- "lr.<amo>%I3\t%4, %1\;"
- "sc.<amo>%J3\t%0, %0, %1\;"
- "bnez\t%0, 1b\;"
- "mv\t%0, %4";
+ "lr.<amo>%I3\t%0, %1\;"
+ "sc.<amo>%J3\t%4, %z2, %1\;"
+ "bnez\t%4, 1b\";
}
[(set_attr "type" "atomic")
- (set (attr "length") (const_int 16))])
+ (set (attr "length") (const_int 12))])
(define_expand "atomic_exchange<mode>"
[(match_operand:SHORT 0 "register_operand") ;; old value at mem
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 6493087..32092d8 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -104,6 +104,13 @@ riscv-avlprop.o: $(srcdir)/config/riscv/riscv-avlprop.cc \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-avlprop.cc
+riscv-vect-permconst.o: $(srcdir)/config/riscv/riscv-vect-permconst.cc \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
+ $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-attr.h \
+ tm-constrs.h insn-opinit.h cfgrtl.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/riscv/riscv-vect-permconst.cc
+
riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
$(COMPILE) $<
@@ -180,3 +187,49 @@ s-riscv-vector-type-indexer.gen.defs: build/genrvv-type-indexer$(build_exeext)
$(STAMP) s-riscv-vector-type-indexer.gen.defs
genprog+=rvv-type-indexer
+
+RISCV_EXT_DEFS = \
+ $(srcdir)/config/riscv/riscv-ext.def \
+ $(srcdir)/config/riscv/riscv-ext-corev.def \
+ $(srcdir)/config/riscv/riscv-ext.def \
+ $(srcdir)/config/riscv/riscv-ext-sifive.def \
+ $(srcdir)/config/riscv/riscv-ext-thead.def \
+ $(srcdir)/config/riscv/riscv-ext-ventana.def
+
+$(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS)
+
+build/gen-riscv-ext-opt.o: $(srcdir)/config/riscv/gen-riscv-ext-opt.cc \
+ $(RISCV_EXT_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-ext-opt$(build_exeext): build/gen-riscv-ext-opt.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+s-riscv-ext.opt: build/gen-riscv-ext-opt$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-ext-opt$(build_exeext) > tmp-riscv-ext.opt
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.opt $(srcdir)/config/riscv/riscv-ext.opt
+ $(STAMP) s-riscv-ext.opt
+
+build/gen-riscv-ext-texi.o: $(srcdir)/config/riscv/gen-riscv-ext-texi.cc \
+ $(RISCV_EXT_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-ext-texi$(build_exeext): build/gen-riscv-ext-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-ext.texi: $(RISCV_EXT_DEFS)
+$(srcdir)/doc/riscv-ext.texi: s-riscv-ext.texi ; @true
+
+# Generate the doc when generating option file.
+$(srcdir)/config/riscv/riscv-ext.opt: s-riscv-ext.texi ; @true
+
+s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-ext-texi$(build_exeext) > tmp-riscv-ext.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi
+ $(STAMP) s-riscv-ext.texi
+
+# Run `riscv-regen' after you changed or added anything from riscv-ext*.def
+
+.PHONY: riscv-regen
+
+riscv-regen: s-riscv-ext.texi s-riscv-ext.opt
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index f8da71b..44ae79c 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -120,10 +120,11 @@
UNSPEC_SF_VFNRCLIP
UNSPEC_SF_VFNRCLIPU
+ UNSPEC_SF_CV
])
(define_c_enum "unspecv" [
- UNSPECV_FRM_RESTORE_EXIT
+ UNSPECV_SF_CV
])
;; Subset of VI with fractional LMUL types
@@ -4040,6 +4041,14 @@
smax umax smin umin mult div udiv mod umod
])
+(define_code_iterator any_int_binop_no_shift_v_vdup [
+ plus minus and ior xor mult div udiv mod umod smax umax smin umin
+])
+
+(define_code_iterator any_int_binop_no_shift_vdup_v [
+ plus minus and ior xor mult smax umax smin umin
+])
+
(define_code_iterator any_int_unop [neg not])
(define_code_iterator any_commutative_binop [plus and ior xor
@@ -4873,3 +4882,50 @@
(RVVM1QI "rvvm4sf")
(RVVM2QI "rvvm8sf")
])
+
+
+(define_mode_iterator SF_VC_W [
+ RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+ RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+ RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_attr SF_VW [
+ (RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI")
+ (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI")
+ (RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI")
+ (RVVMF4HI "RVVMF2SI")
+ (RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI")
+])
+
+(define_mode_attr sf_vw [
+ (RVVM4QI "rvvm8hi") (RVVM2QI "rvvm4hi") (RVVM1QI "rvvm2hi") (RVVMF2QI "rvvm1hi")
+ (RVVMF4QI "rvvmf2hi") (RVVMF8QI "rvvmf4hi")
+ (RVVM4HI "rvvm8si") (RVVM2HI "rvvm4si") (RVVM1HI "rvvm2si") (RVVMF2HI "rvvm1si")
+ (RVVMF4HI "rvvmf2si")
+ (RVVM4SI "rvvm8di") (RVVM2SI "rvvm4di") (RVVM1SI "rvvm2di") (RVVMF2SI "rvvm1di")
+])
+
+(define_mode_iterator SF_FV [
+ RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+ RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+ (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
+ (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
+])
+
+
+(define_mode_iterator SF_VC_FW [
+ RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+ RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_attr SF_XF [
+ (RVVM8HI "HF") (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF")
+ (RVVM8SI "SF") (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF")
+ (RVVM8DI "DF") (RVVM4DI "DF") (RVVM2DI "DF") (RVVM1DI "DF")
+])
+
+(define_mode_attr SF_XFW [
+ (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF")
+ (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF")
+])
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64f..851ba4a 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -56,8 +56,7 @@
vssegtux,vssegtox,vlsegdff,vandn,vbrev,vbrev8,vrev8,vcpop,vclz,vctz,vrol,\
vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,\
- vfncvtbf16,vfwcvtbf16,vfwmaccbf16,\
- sf_vqmacc,sf_vfnrclip")
+ vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vqmacc,sf_vfnrclip,sf_vc,sf_vc_se")
(const_string "true")]
(const_string "false")))
@@ -1116,19 +1115,6 @@
(set_attr "mode" "SI")]
)
-;; The volatile fsrmsi restore is used for the exit point for the
-;; dynamic mode switching. It will generate one volatile fsrm a5
-;; which won't be eliminated.
-(define_insn "fsrmsi_restore_volatile"
- [(set (reg:SI FRM_REGNUM)
- (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
- UNSPECV_FRM_RESTORE_EXIT))]
- "TARGET_VECTOR"
- "fsrm\t%0"
- [(set_attr "type" "wrfrm")
- (set_attr "mode" "SI")]
-)
-
;; Read FRM
(define_insn "frrmsi"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -2136,18 +2122,34 @@
(match_operand 7 "const_int_operand")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLS
- (match_operand:<VEL> 3 "direct_broadcast_operand"))
+ ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+ (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
(match_operand:V_VLS 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
/* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
has better chances to do vsetvl fusion in vsetvl pass. */
+ bool wrap_vec_dup = true;
+ rtx vec_cst = NULL_RTX;
if (riscv_vector::splat_to_scalar_move_p (operands))
{
operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
operands[3] = force_reg (<VEL>mode, operands[3]);
}
+ else if (immediate_operand (operands[3], <VEL>mode)
+ && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+ && (/* -> pred_broadcast<mode>_zero */
+ (vector_least_significant_set_mask_operand (operands[1],
+ <VM>mode)
+ && vector_const_0_operand (vec_cst, <MODE>mode))
+ || (/* pred_broadcast<mode>_imm */
+ vector_all_trues_mask_operand (operands[1], <VM>mode)
+ && vector_const_int_or_double_0_operand (vec_cst,
+ <MODE>mode))))
+ {
+ operands[3] = vec_cst;
+ wrap_vec_dup = false;
+ }
/* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */
else if (satisfies_constraint_Wdm (operands[3]))
{
@@ -2191,6 +2193,8 @@
;
else
operands[3] = force_reg (<VEL>mode, operands[3]);
+ if (wrap_vec_dup)
+ operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
})
(define_insn_and_split "*pred_broadcast<mode>"
diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md
index f87b4f2..d170f6a 100644
--- a/gcc/config/riscv/zicond.md
+++ b/gcc/config/riscv/zicond.md
@@ -234,5 +234,39 @@
(const_int 0)
(match_dup 4)))])
+;; We can splat the sign bit across a GPR with a arithmetic right shift
+;; which gives us a 0, -1 result. We then turn on bit #0 unconditionally
+;; which results in 1, -1. There's probably other cases that could be
+;; handled, this seems particularly important though.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (plus:X (if_then_else:X (ge:X (match_operand:X 1 "register_operand")
+ (const_int 0))
+ (match_operand 2 "const_int_operand")
+ (match_operand 3 "const_int_operand"))
+ (match_operand 4 "const_int_operand")))]
+ "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV)
+ && INTVAL (operands[2]) + INTVAL (operands[4]) == 1
+ && INTVAL (operands[3]) + INTVAL (operands[4]) == -1)"
+ [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))]
+ { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); })
-
+;; Similarly, but the condition and true/false values are reversed
+;;
+;; Note the case where the condition is reversed, but not the true/false
+;; values. Or vice-versa is not handled because we don't support 4->3
+;; splits.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (plus:X (if_then_else:X (lt:X (match_operand:X 1 "register_operand")
+ (const_int 0))
+ (match_operand 2 "const_int_operand")
+ (match_operand 3 "const_int_operand"))
+ (match_operand 4 "const_int_operand")))]
+ "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV)
+ && INTVAL (operands[2]) + INTVAL (operands[4]) == -1
+ && INTVAL (operands[3]) + INTVAL (operands[4]) == 1)"
+ [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))]
+ { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); })
diff --git a/gcc/config/rl78/rl78.cc b/gcc/config/rl78/rl78.cc
index 09753b6..28728aa 100644
--- a/gcc/config/rl78/rl78.cc
+++ b/gcc/config/rl78/rl78.cc
@@ -4953,8 +4953,7 @@ rl78_emit_libcall (const char *name, enum rtx_code code,
gcc_unreachable ();
}
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
emit_libcall_block (insns, operands[0], ret, equiv);
return ret;
}
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 1118023..bc1580f 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -915,7 +915,7 @@ fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1,
tree cmp_type = truth_type_for (type);
tree zero_vec = build_zero_cst (type);
tree minus_one_vec = build_minus_one_cst (type);
- tree temp = create_tmp_reg_or_ssa_name (cmp_type);
+ tree temp = make_ssa_name (cmp_type);
gimple *g = gimple_build_assign (temp, code, arg0, arg1);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec);
@@ -1106,7 +1106,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
if (TREE_TYPE (src_ptr) != src_type)
src_ptr = build1 (NOP_EXPR, src_type, src_ptr);
- tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type));
+ tree src = make_ssa_name (TREE_TYPE (src_type));
gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
/* If we are not disassembling an accumulator/pair or our destination is
@@ -1130,7 +1130,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
{
new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
new_call = gimple_build_call (new_decl, 1, src);
- src = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+ src = make_ssa_name (vector_quad_type_node);
gimple_call_set_lhs (new_call, src);
gimple_seq_add_stmt (&new_seq, new_call);
}
@@ -1146,7 +1146,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
build_int_cst (dst_type, index * 16));
- tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node);
+ tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
new_call = gimple_build_call (new_decl, 2, src,
build_int_cstu (uint16_type_node, i));
gimple_call_set_lhs (new_call, dstssa);
@@ -1204,7 +1204,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
{
/* This built-in has a pass-by-reference accumulator input, so load it
into a temporary accumulator for use as a pass-by-value input. */
- op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+ op[0] = make_ssa_name (vector_quad_type_node);
for (unsigned i = 1; i < nopnds; i++)
op[i] = gimple_call_arg (stmt, i);
gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
@@ -1252,9 +1252,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
}
if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V)
- lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node);
+ lhs = make_ssa_name (vector_pair_type_node);
else
- lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+ lhs = make_ssa_name (vector_quad_type_node);
gimple_call_set_lhs (new_call, lhs);
gimple_seq_add_stmt (&new_seq, new_call);
gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
@@ -1450,7 +1450,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
lhs = gimple_call_lhs (stmt);
- temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+ temp = make_ssa_name (TREE_TYPE (arg1));
g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
gimple_set_location (g, gimple_location (stmt));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1472,7 +1472,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
lhs = gimple_call_lhs (stmt);
- temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+ temp = make_ssa_name (TREE_TYPE (arg1));
g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
gimple_set_location (g, gimple_location (stmt));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1512,7 +1512,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
lhs = gimple_call_lhs (stmt);
- temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+ temp = make_ssa_name (TREE_TYPE (arg1));
g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
gimple_set_location (g, gimple_location (stmt));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1552,7 +1552,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
lhs = gimple_call_lhs (stmt);
- temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+ temp = make_ssa_name (TREE_TYPE (arg1));
g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
gimple_set_location (g, gimple_location (stmt));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1643,7 +1643,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
lhs = gimple_call_lhs (stmt);
- temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+ temp = make_ssa_name (TREE_TYPE (arg1));
g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
gimple_set_location (g, gimple_location (stmt));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 737c3d6..7ee26e5 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -55,7 +55,6 @@
#include "output.h"
#include "common/common-target.h"
#include "langhooks.h"
-#include "reload.h"
#include "sched-int.h"
#include "gimplify.h"
#include "gimple-iterator.h"
@@ -9259,8 +9258,7 @@ rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
start_sequence ();
ret = rs6000_legitimize_address (x, oldx, mode);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
if (ret != x)
{
@@ -25314,7 +25312,6 @@ rs6000_get_function_versions_dispatcher (void *decl)
struct cgraph_node *node = NULL;
struct cgraph_node *default_node = NULL;
struct cgraph_function_version_info *node_v = NULL;
- struct cgraph_function_version_info *first_v = NULL;
tree dispatch_decl = NULL;
@@ -25334,38 +25331,16 @@ rs6000_get_function_versions_dispatcher (void *decl)
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
- /* Find the default version and make it the first node. */
- first_v = node_v;
- /* Go to the beginning of the chain. */
- while (first_v->prev != NULL)
- first_v = first_v->prev;
-
- default_version_info = first_v;
- while (default_version_info != NULL)
- {
- const tree decl2 = default_version_info->this_node->decl;
- if (is_function_default_version (decl2))
- break;
- default_version_info = default_version_info->next;
- }
+ /* The default node is always the beginning of the chain. */
+ default_version_info = node_v;
+ while (default_version_info->prev)
+ default_version_info = default_version_info->prev;
+ default_node = default_version_info->this_node;
/* If there is no default node, just return NULL. */
- if (default_version_info == NULL)
+ if (!is_function_default_version (default_node->decl))
return NULL;
- /* Make default info the first node. */
- if (first_v != default_version_info)
- {
- default_version_info->prev->next = default_version_info->next;
- if (default_version_info->next)
- default_version_info->next->prev = default_version_info->prev;
- first_v->prev = default_version_info;
- default_version_info->next = first_v;
- default_version_info->prev = NULL;
- }
-
- default_node = default_version_info->this_node;
-
#ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
error_at (DECL_SOURCE_LOCATION (default_node->decl),
"%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
@@ -25765,10 +25740,13 @@ rs6000_can_inline_p (tree caller, tree callee)
}
}
- /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
- purposes. */
- callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
- explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+ /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options
+ for inlining purposes. */
+ HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION
+ | OPTION_MASK_P10_FUSION
+ | OPTION_MASK_SAVE_TOC_INDIRECT);
+ callee_isa &= ~ignored_isas;
+ explicit_isa &= ~ignored_isas;
/* The callee's options must be a subset of the caller's options, i.e.
a vsx function may inline an altivec function, but a no-vsx function
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index edb2c96..a3d966e 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -2541,10 +2541,17 @@
(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand") ;; String1
(match_operand:BLK 2 "memory_operand")] ;; String2
UNSPEC_CMPSTRN))
- (use (match_operand:SI 3 "register_operand")) ;; Max Length
+ (use (match_operand:SI 3 "nonmemory_operand")) ;; Max Length
(match_operand:SI 4 "immediate_operand")] ;; Known Align
"rx_allow_string_insns"
{
+ bool const_len = CONST_INT_P (operands[3]);
+ if (const_len && operands[3] == CONST0_RTX (SImode))
+ {
+ emit_move_insn (operands[0], CONST0_RTX (SImode));
+ DONE;
+ }
+
rtx str1 = gen_rtx_REG (SImode, 1);
rtx str2 = gen_rtx_REG (SImode, 2);
rtx len = gen_rtx_REG (SImode, 3);
@@ -2553,6 +2560,13 @@
emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
emit_move_insn (len, operands[3]);
+ /* Set flags in case len is zero */
+ if (!const_len)
+ {
+ emit_insn (gen_setpsw (GEN_INT ('C')));
+ emit_insn (gen_setpsw (GEN_INT ('Z')));
+ }
+
emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
DONE;
}
@@ -2590,9 +2604,7 @@
(clobber (reg:SI 3))
(clobber (reg:CC CC_REG))]
"rx_allow_string_insns"
- "setpsw z ; Set flags in case len is zero
- setpsw c
- scmpu ; Perform the string comparison
+ "scmpu ; Perform the string comparison
mov #-1, %0 ; Set up -1 result (which cannot be created
; by the SC insn)
bnc ?+ ; If Carry is not set skip over
diff --git a/gcc/config/s390/9175.md b/gcc/config/s390/9175.md
new file mode 100644
index 0000000..d0ac0e1
--- /dev/null
+++ b/gcc/config/s390/9175.md
@@ -0,0 +1,316 @@
+;; Scheduling description for z17.
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "z17_unit_fpd" ""
+ (cond [(eq_attr "mnemonic" "ddb,ddbr,deb,debr,dxbr,sqdb,sqdbr,sqeb,sqebr,\
+sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,vfddb,vfdsb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,\
+vrlq,vrq,wfddb,wfdsb,wfdxb,wfsqdb,wfsqxb")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxa" ""
+ (cond [(eq_attr "mnemonic" "a,afi,ag,agf,agfi,agfr,agh,aghi,aghik,agr,agrk,ah,\
+ahi,ahik,ahy,al,alc,alcg,alcgr,alcr,alfi,alg,algf,algfi,algfr,alghsik,algr,\
+algrk,alhsik,alr,alrk,aly,ar,ark,ay,bdepg,bextg,clzg,ctzg,etnd,flogr,ic,icm,\
+icmh,icmy,icy,iihf,iilf,ipm,la,larl,lay,lb,lbr,lcgfr,lcgr,lcr,lgb,lgbr,lgf,\
+lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,llcr,llgcr,llgfr,\
+llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,llxab,llxaf,llxag,llxah,\
+llxaq,lngfr,lngr,lnr,loc,locg,locghi,locgr,lochi,locr,lpgfr,lpgr,lpr,lr,lrv,\
+lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,lxab,lxaf,lxag,lxah,lxaq,m,mfy,\
+mg,mgh,mghi,mgrk,mh,mhi,mhy,ml,mlg,mlgr,mlr,mr,ms,msc,msfi,msg,msgc,msgf,msgfi,\
+msgfr,msgr,msgrkc,msr,msrkc,msy,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,\
+nilh,nill,nngrk,nnrk,nogrk,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,\
+oihf,oihh,oihl,oilf,oilh,oill,or,ork,oy,pfpo,popcnt,risbg,risbgn,rll,rllg,\
+rnsbg,rosbg,rxsbg,s,selgr,selr,sg,sgf,sgfr,sgh,sgr,sgrk,sh,shy,sl,slb,slbg,\
+slbgr,slbr,sldl,slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,\
+sly,sr,sra,srag,srak,srda,srdl,srk,srl,srlg,srlk,sy,x,xg,xgr,xgrk,xihf,xilf,xr,\
+xrk,xy")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxb" ""
+ (cond [(eq_attr "mnemonic" "agsi,algsi,alsi,asi,b,bc,bcr,bi,br,c,cfi,cg,cgf,\
+cgfi,cgfr,cgfrl,cgh,cghi,cghrl,cghsi,cgit,cgr,cgrl,cgrt,ch,chi,chrl,chsi,chy,\
+cit,cl,clfhsi,clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,\
+clgrl,clgrt,clgt,clhhsi,clhrl,cli,cliy,clm,clmy,clr,clrl,clrt,clt,cly,cr,crl,\
+crt,cy,laa,laag,lan,lang,lao,laog,lat,lax,laxg,lcdfr,ldgr,ldr,lgat,lgdr,lndfr,\
+lpdfr,lxr,lzdr,lzer,lzxr,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,nop,nopr,ntstg,oi,\
+oiy,ppa,st,stc,stcy,std,stdy,ste,stey,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,\
+strv,strvg,strvh,sty,tend,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlgvb,vlgvf,vlgvg,\
+vlgvh,vlr,vlvgb,vlvgf,vlvgg,vlvgh,vlvgp,vscef,vsceg,vst,vstbrf,vstbrg,vstbrh,\
+vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,vstrlr,xi,\
+xiy")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxd" ""
+ (cond [(eq_attr "mnemonic" "dlgr,dlr,dr,dsgfr,dsgr")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_lsu" ""
+ (cond [(eq_attr "mnemonic" "clc,ear,l,lam,lcbb,ld,lde,ldy,lg,lgrl,llc,llgc,\
+llgf,llgfrl,llgh,llghrl,llgt,llh,llhrl,lm,lmg,lmy,lpq,lrl,ly,mvcrl,sar,sfpc,\
+tabort,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,vlbrrepf,vlbrrepg,vlbrreph,vlerf,vlerg,\
+vlerh,vll,vllebrzf,vllebrzg,vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,\
+vlrepb,vlrepf,vlrepg,vlreph,vlrl,vlrlr")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_vfu" ""
+ (cond [(eq_attr "mnemonic" "adb,adbr,adtr,aeb,aebr,axbr,axtr,brcl,cdb,cdbr,\
+cdtr,ceb,cebr,cpsdr,cxbr,cxtr,ddtr,dxtr,fidbr,fidbra,fidtr,fiebr,fiebra,fixbr,\
+fixbra,fixtr,j,jg,kdb,kdbr,kdtr,keb,kebr,kxbr,kxtr,lcdbr,lcebr,lcxbr,ldeb,\
+ldebr,ldetr,le,ledbr,ledtr,ler,ley,lndbr,lnebr,lnxbr,lpdbr,lpebr,lpxbr,ltdbr,\
+ltdtr,ltebr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,madb,madbr,maeb,maebr,mdb,\
+mdbr,mdtr,meeb,meebr,msdb,msdbr,mseb,msebr,mxbr,mxtr,sdb,sdbr,sdtr,seb,sebr,\
+sxbr,sxtr,tcdb,tceb,tcxb,tdcdt,tdcet,tdcxt,vab,vaccb,vacccq,vaccf,vaccg,vacch,\
+vaccq,vacq,vaf,vag,vah,vaq,vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,\
+vavglq,vavgq,vblendb,vblendf,vblendg,vblendh,vblendq,vbperm,vcdgb,vcdlgb,vcefb,\
+vcelfb,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,vceqh,vceqhs,vceqq,vceqqs,vcfeb,\
+vcfn,vcgdb,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,vchlb,vchlbs,vchlf,\
+vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,vcksm,vclfeb,vclfnh,\
+vclfnl,vclgdb,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\
+vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\
+verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\
+vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\
+vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfadb,vfasb,vfcedb,vfcedbs,vfcesb,\
+vfcesbs,vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfeeb,\
+vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,vfenezb,vfenezf,\
+vfenezh,vfidb,vfisb,vfkedb,vfkesb,vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,\
+vflndb,vflnsb,vflpdb,vflpsb,vfmadb,vfmasb,vfmaxdb,vfmaxsb,vfmdb,vfmindb,\
+vfminsb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vfsdb,vfssb,\
+vftcidb,vftcisb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgfmab,vgfmaf,vgfmag,vgfmah,\
+vgfmb,vgfmf,vgfmg,vgfmh,vgm,vistrb,vistrbs,vistrf,vistrfs,vistrh,vistrhs,vlcb,\
+vlcf,vlcg,vlch,vldeb,vleb,vlebrf,vlebrg,vlebrh,vledb,vlef,vleg,vleh,vleib,\
+vleif,vleig,vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,\
+vmahg,vmahh,vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,\
+vmalhg,vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,\
+vmaog,vmaoh,vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,\
+vmleg,vmleh,vmlf,vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,\
+vmloh,vmlq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,vmnlq,vmnq,vmob,vmof,\
+vmog,vmoh,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmslg,vmxb,vmxf,vmxg,\
+vmxh,vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,\
+vpdi,vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,\
+vpksfs,vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,\
+vrepf,vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,\
+vscbif,vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,\
+vsldb,vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,\
+vtm,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,vuplhb,vuplhf,vuplhg,vuplhh,\
+vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,\
+wcgdb,wclfeb,wclgdb,wfadb,wfasb,wfaxb,wfcdb,wfcedb,wfcesb,wfcexb,wfcexbs,\
+wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,wfchxbs,wfcsb,wfcxb,\
+wfidb,wfisb,wfixb,wfkdb,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,\
+wfkhsb,wfkhxb,wfksb,wfkxb,wflcdb,wflcsb,wflcxb,wflld,wflndb,wflnsb,wflnxb,\
+wflpdb,wflpsb,wflpxb,wflrx,wfmadb,wfmasb,wfmaxb,wfmaxxb,wfmdb,wfminxb,wfmsb,\
+wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsdb,wfssb,wfsxb,wftcixb,wldeb,\
+wledb")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_cracked" ""
+ (cond [(eq_attr "mnemonic" "bas,basr,bras,brasl,cdfbr,cdftr,cdgbr,cdgtr,\
+cdlfbr,cdlftr,cdlgbr,cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,\
+cgdtr,cgebr,cgxbr,cgxtr,chhsi,clfdbr,clfdtr,clfebr,clfxbr,clfxtr,clgdbr,clgdtr,\
+clgebr,clgxbr,clgxtr,cs,csg,csy,efpc,ex,exrl,lcgfr,lngfr,lpgfr,lpq,lxr,lzxr,\
+mvc,nc,oc,rnsbg,rosbg,rxsbg,stpq,vgef,vgeg,vscef,vsceg,vsteb,vstebrh,vsteh,xc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_expanded" ""
+ (cond [(eq_attr "mnemonic" "cds,cdsg,cdsy,cxfbr,cxftr,cxgbr,cxgtr,cxlfbr,\
+cxlftr,cxlgbr,cxlgtr,d,dl,dlg,dsg,dsgf,lam,lm,lmg,lmy,sldl,srda,srdl,stam,stm,\
+stmg,stmy,tbegin,tbeginc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_groupalone" ""
+ (cond [(eq_attr "mnemonic" "alc,alcg,alcgr,alcr,axbr,axtr,clc,cxbr,cxfbr,\
+cxftr,cxgbr,cxgtr,cxlfbr,cxlftr,cxlgbr,cxlgtr,cxtr,d,dl,dlg,dlgr,dlr,dr,dsg,\
+dsgf,dsgfr,dsgr,dxbr,dxtr,ex,exrl,fixbr,fixbra,fixtr,flogr,kxbr,kxtr,lcxbr,\
+lnxbr,lpxbr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,m,madb,maeb,maebr,mfy,mg,\
+mgrk,ml,mlg,mlgr,mlr,mr,msdb,mseb,msebr,mvc,mvcrl,mxbr,mxtr,nc,oc,ppa,sfpc,slb,\
+slbg,slbgr,slbr,sqxbr,sxbr,sxtr,tabort,tbegin,tbeginc,tcxb,tdcxt,tend,xc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_endgroup" ""
+ (cond [(eq_attr "mnemonic" "bas,basr,bcr,br,bras,brasl,cdsg,clfebr,cs,csg,csy,\
+efpc,ex,exrl,ipm,lam,lpq,lxr,nopr,sldl,srda,srdl,stam,stm,stmg,stmy,tbegin,\
+tbeginc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_groupoftwo" ""
+ (cond [(eq_attr "mnemonic" "cdfbr,cdftr,cdgbr,cdgtr,cdlfbr,cdlftr,cdlgbr,\
+cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,cgdtr,cgebr,cgxbr,\
+cgxtr,chhsi,clfdbr,clfdtr,clfxbr,clfxtr,clgdbr,clgdtr,clgebr,clgxbr,clgxtr,\
+lcgfr,lngfr,lpgfr,lzxr,vacccq,vacq,vblendb,vblendf,vblendg,vblendh,vblendq,\
+veval,vfmadb,vfmasb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vgef,vgeg,\
+vgfmab,vgfmaf,vgfmag,vgfmah,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\
+vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\
+vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\
+vmslg,vperm,vsbcbiq,vsbiq,vscef,vsceg,vsel,vsteb,vstebrh,vsteh,wfmadb,wfmasb,\
+wfmaxb,wfmsdb,wfmssb,wfmsxb,wfnmaxb,wfnmsxb")
+ (const_int 1)] (const_int 0)))
+
+(define_insn_reservation "z17_0" 0
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "a,afi,ag,agfi,aghi,aghik,agr,agrk,ahi,ahik,al,alfi,alg,\
+algf,algfi,algfr,alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,b,bc,bcr,bi,\
+br,brcl,c,cfi,cg,cgfi,cghi,cghsi,cgit,cgr,cgrl,cgrt,chi,chsi,cit,cl,clfhsi,\
+clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,clgrl,clgrt,\
+clgt,clhhsi,clhrl,cli,cliy,clr,clrl,clrt,clt,cly,cr,crl,crt,cy,etnd,ic,icm,\
+icmh,icmy,icy,iihf,iilf,j,jg,la,larl,lat,lay,lb,lbr,lcdfr,lcgr,lcr,ldgr,ldr,\
+lgat,lgb,lgbr,lgf,lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,\
+llcr,llgcr,llgfr,llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,lndfr,\
+lngr,lnr,lpdfr,lpgr,lpr,lr,lrv,lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,\
+lzdr,lzer,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,nilh,nill,nngrk,nnrk,\
+nogrk,nop,nopr,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,oihf,oihh,\
+oihl,oilf,oilh,oill,or,ork,oy,pfpo,risbg,risbgn,rll,rllg,s,sg,sgr,sgrk,sl,sldl,\
+slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,sly,sr,sra,srag,\
+srak,srda,srdl,srk,srl,srlg,srlk,sy,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlr,\
+vlvgb,vlvgf,vlvgg,vlvgh,x,xg,xgr,xgrk,xihf,xilf,xr,xrk,xy")) "nothing")
+
+(define_insn_reservation "z17_1" 1
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "agf,agfr,agh,agsi,ah,ahy,algsi,alsi,asi,cgf,cgfr,cgfrl,\
+cgh,cghrl,ch,chrl,chy,clm,clmy,cpsdr,laa,laag,lan,lang,lao,laog,lax,laxg,le,\
+ler,ley,llxab,llxaf,llxag,llxah,llxaq,loc,locg,locghi,locgr,lochi,locr,lxab,\
+lxaf,lxag,lxah,lxaq,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,ntstg,oi,oiy,selgr,selr,\
+sgf,sgfr,sgh,sh,shy,st,stc,stcy,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,strv,\
+strvg,strvh,sty,vab,vaccb,vacccq,vaccf,vaccg,vacch,vaccq,vacq,vaf,vag,vah,vaq,\
+vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,vavglq,vavgq,vblendb,\
+vblendf,vblendg,vblendh,vblendq,vbperm,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,\
+vceqh,vceqhs,vceqq,vceqqs,vcfn,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,\
+vchlb,vchlbs,vchlf,vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,\
+vclfnh,vclfnl,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\
+vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\
+verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\
+vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\
+vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfcedb,vfcedbs,vfcesb,vfcesbs,\
+vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfkedb,vfkesb,\
+vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,vflndb,vflnsb,vflpdb,vflpsb,\
+vfmaxdb,vfmaxsb,vfmindb,vfminsb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgm,vlcb,\
+vlcf,vlcg,vlch,vleb,vlebrf,vlebrg,vlebrh,vlef,vleg,vleh,vleib,vleif,vleig,\
+vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,\
+vmnlq,vmnq,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmxb,vmxf,vmxg,vmxh,\
+vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,vpdi,\
+vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,vpksfs,\
+vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,vrepf,\
+vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,vscbif,\
+vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,vsldb,\
+vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,\
+vuplhb,vuplhf,vuplhg,vuplhh,vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wfcedb,\
+wfcesb,wfcexb,wfcexbs,wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,\
+wfchxbs,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,wfkhsb,wfkhxb,\
+wflcdb,wflcsb,wflcxb,wflndb,wflnsb,wflnxb,wflpdb,wflpsb,wflpxb,wfmaxxb,wfminxb,\
+xi,xiy")) "nothing")
+
+(define_insn_reservation "z17_2" 2
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdb,cdbr,ceb,cebr,clzg,ctzg,ear,ipm,kdb,kdbr,keb,kebr,l,\
+lcbb,lcdbr,lcebr,ld,lde,ldy,lg,lgdr,lgrl,llc,llgc,llgf,llgfrl,llgh,llghrl,llgt,\
+llh,llhrl,lm,lmg,lmy,lndbr,lnebr,lpdbr,lpebr,lrl,ltdbr,ltebr,ly,popcnt,sar,\
+tcdb,tceb,vfeeb,vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,\
+vfenezb,vfenezf,vfenezh,vftcidb,vftcisb,vistrb,vistrbs,vistrf,vistrfs,vistrh,\
+vistrhs,vlbrrepf,vlbrrepg,vlbrreph,vlgvb,vlgvf,vlgvg,vlgvh,vllebrzf,vllebrzg,\
+vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,vlrepb,vlrepf,vlrepg,vlreph,vlrl,\
+vlvgp,wfcdb,wfcsb,wfcxb,wfkdb,wfksb,wfkxb,wftcixb")) "nothing")
+
+(define_insn_reservation "z17_3" 3
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "bdepg,bextg,cds,cdsy,mgh,mghi,mh,mhi,mhy,ms,msc,msfi,msg,\
+msgc,msgf,msgfi,msgfr,msgr,msgrkc,msr,msrkc,msy,std,stdy,ste,stey,vcksm,vgfmab,\
+vgfmaf,vgfmag,vgfmah,vgfmb,vgfmf,vgfmg,vgfmh,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,\
+vlerf,vlerg,vlerh,vll,vlrlr,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\
+vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\
+vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\
+vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,vmleg,vmleh,vmlf,\
+vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,vmloh,vmlq,vmob,\
+vmof,vmog,vmoh,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,vtm")) "nothing")
+
+(define_insn_reservation "z17_4" 4
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "bas,basr,bras,brasl,chhsi,clc,ex,exrl,lam,lcgfr,lngfr,\
+lpgfr,lxr,lzxr,mvcrl,ppa,rnsbg,rosbg,rxsbg,tabort,tend,vst,vstbrf,vstbrg,\
+vstbrh,vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,\
+vstrlr")) "nothing")
+
+(define_insn_reservation "z17_5" 5
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "adb,adbr,aeb,aebr,alc,alcg,alcgr,alcr,cs,csg,csy,fidbr,\
+fidbra,fiebr,fiebra,ldeb,ldebr,ledbr,madbr,mdb,mdbr,meeb,meebr,msdbr,sdb,sdbr,\
+seb,sebr,slb,slbg,slbgr,slbr,stm,stmg,stmy,vcdgb,vcdlgb,vcefb,vcelfb,vcfeb,\
+vcgdb,vclfeb,vclgdb,vldeb,vledb,vmslg,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,wcgdb,\
+wclfeb,wclgdb,wflld,wflrx,wldeb,wledb")) "nothing")
+
+(define_insn_reservation "z17_6" 6
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "sfpc")) "nothing")
+
+(define_insn_reservation "z17_7" 7
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "adtr,cdtr,fidtr,kdtr,ldetr,ltdtr,sdtr,tdcdt,tdcet,vfadb,\
+vfasb,vfidb,vfisb,vfsdb,vfssb,vgef,vgeg,wfadb,wfasb,wfaxb,wfidb,wfisb,wfixb,\
+wfsdb,wfssb,wfsxb")) "nothing")
+
+(define_insn_reservation "z17_8" 8
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdgtr,cdlgtr,cdsg,cxgtr,cxlgtr,flogr,lpq,m,mfy,mg,mgrk,\
+ml,mlg,mlgr,mlr,mr,stpq,vsteb,vstebrh,vsteh")) "nothing")
+
+(define_insn_reservation "z17_9" 9
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdfbr,cdgbr,cdlfbr,cdlgbr,cefbr,cegbr,celfbr,celgbr,madb,\
+maeb,maebr,msdb,mseb,msebr,stam")) "nothing")
+
+(define_insn_reservation "z17_10" 10
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cgdtr,cgxtr,clfdtr,clfxtr,clgdtr,clgxtr,d,dl,dlg,dsg,\
+dsgf,efpc,lxdb,lxdbr,lxeb,lxebr,vscef,vsceg")) "nothing")
+
+(define_insn_reservation "z17_11" 11
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cfdbr,cfebr,cgdbr,cgebr,clfdbr,clfebr,clgdbr,clgebr")) "nothing")
+
+(define_insn_reservation "z17_12" 12
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cxbr,cxtr,kxbr,kxtr,tbegin,tbeginc,tcxb,tdcxt")) "nothing")
+
+(define_insn_reservation "z17_13" 13
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "axbr,axtr,cxfbr,cxgbr,cxlfbr,cxlgbr,fixbr,fixbra,fixtr,\
+lcxbr,lnxbr,lpxbr,ltxbr,ltxtr,lxdtr,sxbr,sxtr")) "nothing")
+
+(define_insn_reservation "z17_14" 14
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cfxbr,cgxbr,clfxbr,clgxbr,ledtr")) "nothing")
+
+(define_insn_reservation "z17_15" 15
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "nc,oc")) "nothing")
+
+(define_insn_reservation "z17_16" 16
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdftr,cdlftr,cxftr,cxlftr")) "nothing")
+
+(define_insn_reservation "z17_18" 18
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "xc")) "nothing")
+
+(define_insn_reservation "z17_20" 20
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "ddb,ddbr,ddtr,deb,debr,dlgr,dlr,dr,dsgfr,dsgr,dxbr,dxtr,\
+mdtr,mvc,mxbr,mxtr,sqdb,sqdbr,sqeb,sqebr,sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,\
+vfddb,vfdsb,vfmadb,vfmasb,vfmdb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,\
+vfnmssb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,vrlq,vrq,wfddb,wfdsb,wfdxb,wfmadb,\
+wfmasb,wfmaxb,wfmdb,wfmsb,wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsqdb,\
+wfsqxb")) "nothing")
+
diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 49e8fa0..7a7ceea 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -127,6 +127,10 @@ s390_host_detect_local_cpu (int argc, const char **argv)
case 0x3932:
cpu = "arch14";
break;
+ case 0x9175:
+ case 0x9176:
+ cpu = "arch15";
+ break;
default:
cpu = "arch15";
break;
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index d9af9b1..cee2326 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -300,8 +300,8 @@
#define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */
#define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */
#define B_NNPA (1 << 6) /* Builtins requiring the NNPA Facility. */
-#define B_VXE3 (1 << 7) /* Builtins requiring the arch15 vector extensions. */
-#define B_ARCH15 (1 << 8) /* Builtins requiring arch15. */
+#define B_VXE3 (1 << 7) /* Builtins requiring the z17 vector extensions. */
+#define B_Z17 (1 << 8) /* Builtins requiring z17. */
/* B_DEF defines a standard (not overloaded) builtin
B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>)
@@ -3318,8 +3318,8 @@ B_DEF (s390_vcnf, vcnf_v8hi, 0,
/* arch 15 builtins */
-B_DEF (s390_bdepg, bdepg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG)
-B_DEF (s390_bextg, bextg, 0, B_ARCH15, 0, BT_FN_ULONG_ULONG_ULONG)
+B_DEF (s390_bdepg, bdepg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG)
+B_DEF (s390_bextg, bextg, 0, B_Z17, 0, BT_FN_ULONG_ULONG_ULONG)
OB_DEF (s390_vec_blend, s390_vec_blend_s8, s390_vec_blend_dbl, B_VXE3, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_blend_s8, s390_vblendb, 0, 0, BT_OV_V16QI_V16QI_V16QI_V16QI)
diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc
index 311d74a..a01c44c 100644
--- a/gcc/config/s390/s390-c.cc
+++ b/gcc/config/s390/s390-c.cc
@@ -962,7 +962,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl,
if (!TARGET_VXE3 && (ob_flags & B_VXE3))
{
- error_at (loc, "%qF requires arch15 or higher", ob_fndecl);
+ error_at (loc, "%qF requires z17 or higher", ob_fndecl);
return error_mark_node;
}
@@ -1056,7 +1056,7 @@ s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl,
if (!TARGET_VXE3
&& bflags_overloaded_builtin_var[last_match_index] & B_VXE3)
{
- error_at (loc, "%qs matching variant requires arch15 or higher",
+ error_at (loc, "%qs matching variant requires z17 or higher",
IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)));
return error_mark_node;
}
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index 437d3b9..9cacb2c 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -39,7 +39,7 @@ enum processor_type
PROCESSOR_3906_Z14,
PROCESSOR_8561_Z15,
PROCESSOR_3931_Z16,
- PROCESSOR_ARCH15,
+ PROCESSOR_9175_Z17,
PROCESSOR_NATIVE,
PROCESSOR_max
};
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e8c7f83..d760a7e 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
extern void s390_expand_vec_strlen (rtx, rtx, rtx);
extern void s390_expand_vec_movstr (rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 0ff3fd5..3826720 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -342,7 +342,7 @@ const struct s390_processor processor_table[] =
{ "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
{ "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
{ "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 },
- { "arch15", "arch15", PROCESSOR_ARCH15, &zEC12_cost, 15 },
+ { "z17", "arch15", PROCESSOR_9175_Z17, &zEC12_cost, 15 },
{ "native", "", PROCESSOR_NATIVE, NULL, 0 }
};
@@ -916,7 +916,7 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if ((bflags & B_VXE3) && !TARGET_VXE3)
{
- error ("Builtin %qF requires arch15 or higher", fndecl);
+ error ("Builtin %qF requires z17 or higher", fndecl);
return const0_rtx;
}
}
@@ -5589,8 +5589,7 @@ legitimize_tls_address (rtx addr, rtx reg)
new_rtx = force_const_mem (Pmode, new_rtx);
emit_move_insn (r2, new_rtx);
s390_emit_tls_call_insn (r2, tls_call);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
temp = gen_reg_rtx (Pmode);
@@ -5612,8 +5611,7 @@ legitimize_tls_address (rtx addr, rtx reg)
new_rtx = force_const_mem (Pmode, new_rtx);
emit_move_insn (r2, new_rtx);
s390_emit_tls_call_insn (r2, tls_call);
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
temp = gen_reg_rtx (Pmode);
@@ -7210,6 +7208,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
NULL_RTX, 1, OPTAB_DIRECT);
}
+/* Expand optab cstoreti4. */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+ rtx_code code = GET_CODE (cmp);
+
+ if (TARGET_VXE3)
+ {
+ rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+ emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+ return;
+ }
+
+ /* Prior VXE3 emulate the comparison. For an (in)equality test exploit
+ VECTOR COMPARE EQUAL. For a relational test, first compare the high part
+ via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal,
+ then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+ Otherweise, if the high part equals, then perform a subsequent VECTOR
+ COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */
+
+ op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+ op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+ if (code == EQ || code == NE)
+ {
+ s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+ return;
+ }
+
+ /* Normalize code into either GE(U) or GT(U). */
+ if (code == LT || code == LE || code == LTU || code == LEU)
+ {
+ std::swap (op1, op2);
+ code = swap_condition (code);
+ }
+
+ /* For (un)signed comparisons
+ - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+ if the relation does _not_ hold.
+ - high(op1) > high(op2) instruction VECG op2, op1 sets CC1
+ if the relation holds. */
+ if (code == GT || code == GTU)
+ std::swap (op1, op2);
+ machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+ rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ emit_insn (
+ gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+ rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+ rtx lab = gen_label_rtx ();
+ s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+ /* At this point we have that high(op1) == high(op2). Thus, test the low
+ part, now. For unsigned comparisons
+ - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+ if the relation does _not_ hold.
+ - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1
+ if the relation holds. */
+ std::swap (op1, op2);
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVIHUmode, op1, op2)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+ emit_label (lab);
+ /* For (un)signed comparison >= any CC except CC1 means that the relation
+ holds. For (un)signed comparison > only CC1 means that the relation
+ holds. */
+ rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT;
+ rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx);
+ emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+}
+
/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
the result in TARGET. */
@@ -7310,9 +7384,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
elements in CMP1 and CMP2 fulfill the comparison.
- This function is only used to emit patterns for the vx builtins and
- therefore only handles comparison codes required by the
- builtins. */
+ This function is only used in s390_expand_cstoreti4 and to emit patterns for
+ the vx builtins and therefore only handles comparison codes required by
+ those. */
void
s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
rtx cmp1, rtx cmp2, bool all_p)
@@ -7793,8 +7867,7 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
const0_rtx, ins))
{
*seq1 = NULL;
- *seq2 = get_insns ();
- end_sequence ();
+ *seq2 = end_sequence ();
return tmp;
}
end_sequence ();
@@ -7803,13 +7876,11 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
/* Failed to use insv. Generate a two part shift and mask. */
start_sequence ();
tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
- *seq1 = get_insns ();
- end_sequence ();
+ *seq1 = end_sequence ();
start_sequence ();
tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
- *seq2 = get_insns ();
- end_sequence ();
+ *seq2 = end_sequence ();
return tmp;
}
@@ -9204,7 +9275,7 @@ s390_issue_rate (void)
case PROCESSOR_3906_Z14:
case PROCESSOR_8561_Z15:
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
+ case PROCESSOR_9175_Z17:
default:
return 1;
}
@@ -11735,8 +11806,7 @@ s390_load_got (void)
emit_move_insn (got_rtx, s390_got_symbol ());
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
return insns;
}
@@ -13503,8 +13573,7 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
start_sequence ();
emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
push_topmost_sequence ();
emit_insn_after (seq, entry_of_function ());
@@ -14496,7 +14565,21 @@ s390_call_saved_register_used (tree call_expr)
for (reg = 0; reg < nregs; reg++)
if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
- return true;
+ {
+ rtx parm;
+ /* Allow passing through unmodified value from caller,
+ see PR119873. */
+ if (TREE_CODE (parameter) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (parameter)
+ && SSA_NAME_VAR (parameter)
+ && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+ && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+ && REG_P (parm)
+ && REGNO (parm) == REGNO (parm_rtx)
+ && REG_NREGS (parm) == REG_NREGS (parm_rtx))
+ break;
+ return true;
+ }
}
else if (GET_CODE (parm_rtx) == PARALLEL)
{
@@ -14510,7 +14593,17 @@ s390_call_saved_register_used (tree call_expr)
gcc_assert (REG_NREGS (r) == 1);
if (!call_used_or_fixed_reg_p (REGNO (r)))
- return true;
+ {
+ rtx parm;
+ if (TREE_CODE (parameter) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (parameter)
+ && SSA_NAME_VAR (parameter)
+ && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+ && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+ && rtx_equal_p (parm_rtx, parm))
+ break;
+ return true;
+ }
}
}
}
@@ -14543,8 +14636,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp)
return false;
/* Register 6 on s390 is available as an argument register but unfortunately
- "caller saved". This makes functions needing this register for arguments
- not suitable for sibcalls. */
+ "caller saved". This makes functions needing this register for arguments
+ not suitable for sibcalls, unless the same value is passed from the
+ caller. */
return !s390_call_saved_register_used (exp);
}
@@ -15632,7 +15726,6 @@ s390_get_sched_attrmask (rtx_insn *insn)
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
if (get_attr_z16_cracked (insn))
mask |= S390_SCHED_ATTR_MASK_CRACKED;
if (get_attr_z16_expanded (insn))
@@ -15644,6 +15737,18 @@ s390_get_sched_attrmask (rtx_insn *insn)
if (get_attr_z16_groupoftwo (insn))
mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
break;
+ case PROCESSOR_9175_Z17:
+ if (get_attr_z17_cracked (insn))
+ mask |= S390_SCHED_ATTR_MASK_CRACKED;
+ if (get_attr_z17_expanded (insn))
+ mask |= S390_SCHED_ATTR_MASK_EXPANDED;
+ if (get_attr_z17_endgroup (insn))
+ mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
+ if (get_attr_z17_groupalone (insn))
+ mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
+ if (get_attr_z17_groupoftwo (insn))
+ mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
+ break;
default:
gcc_unreachable ();
}
@@ -15691,7 +15796,6 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
mask |= 1 << 3;
break;
case PROCESSOR_3931_Z16:
- case PROCESSOR_ARCH15:
*units = 4;
if (get_attr_z16_unit_lsu (insn))
mask |= 1 << 0;
@@ -15702,6 +15806,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
if (get_attr_z16_unit_vfu (insn))
mask |= 1 << 3;
break;
+ case PROCESSOR_9175_Z17:
+ *units = 4;
+ if (get_attr_z17_unit_lsu (insn))
+ mask |= 1 << 0;
+ if (get_attr_z17_unit_fxa (insn))
+ mask |= 1 << 1;
+ if (get_attr_z17_unit_fxb (insn))
+ mask |= 1 << 2;
+ if (get_attr_z17_unit_vfu (insn))
+ mask |= 1 << 3;
+ break;
default:
gcc_unreachable ();
}
@@ -15715,7 +15830,8 @@ s390_is_fpd (rtx_insn *insn)
return false;
return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
- || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn);
+ || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn)
+ || get_attr_z17_unit_fpd (insn);
}
static bool
@@ -15725,7 +15841,8 @@ s390_is_fxd (rtx_insn *insn)
return false;
return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
- || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn);
+ || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn)
+ || get_attr_z17_unit_fxd (insn);
}
/* Returns TRUE if INSN is a long-running instruction. */
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 6f7195d..8b04bc9 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -45,12 +45,12 @@ enum processor_flags
PF_NNPA = 32768,
PF_Z16 = 65536,
PF_VXE3 = 131072,
- PF_ARCH15 = 262144
+ PF_Z17 = 262144
};
/* This is necessary to avoid a warning about comparing different enum
types. */
-#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_3931_Z16 ? PROCESSOR_3931_Z16 : s390_tune ))
+#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_9175_Z17 ? PROCESSOR_9175_Z17 : s390_tune ))
/* These flags indicate that the generated code should run on a cpu
providing the respective hardware facility regardless of the
@@ -124,10 +124,10 @@ enum processor_flags
(s390_arch_flags & PF_VXE3)
#define TARGET_CPU_VXE3_P(opts) \
(opts->x_s390_arch_flags & PF_VXE3)
-#define TARGET_CPU_ARCH15 \
- (s390_arch_flags & PF_ARCH15)
-#define TARGET_CPU_ARCH15_P(opts) \
- (opts->x_s390_arch_flags & PF_ARCH15)
+#define TARGET_CPU_Z17 \
+ (s390_arch_flags & PF_Z17)
+#define TARGET_CPU_Z17_P(opts) \
+ (opts->x_s390_arch_flags & PF_Z17)
#define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts))
@@ -198,9 +198,9 @@ enum processor_flags
(TARGET_VX && TARGET_CPU_VXE3)
#define TARGET_VXE3_P(opts) \
(TARGET_VX_P (opts) && TARGET_CPU_VXE3_P (opts))
-#define TARGET_ARCH15 (TARGET_ZARCH && TARGET_CPU_ARCH15)
-#define TARGET_ARCH15_P(opts) \
- (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH15_P (opts))
+#define TARGET_Z17 (TARGET_ZARCH && TARGET_CPU_Z17)
+#define TARGET_Z17_P(opts) \
+ (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_Z17_P (opts))
#if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13)
#define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 9d49580..97a4bdf 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -599,11 +599,11 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in s390.h.
-(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16"
+(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16,z17"
(const (symbol_ref "s390_tune_attr")))
(define_attr "cpu_facility"
- "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,arch15"
+ "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,z17"
(const_string "standard"))
(define_attr "enabled" ""
@@ -681,8 +681,8 @@
(match_test "TARGET_VXE3"))
(const_int 1)
- (and (eq_attr "cpu_facility" "arch15")
- (match_test "TARGET_ARCH15"))
+ (and (eq_attr "cpu_facility" "z17")
+ (match_test "TARGET_Z17"))
(const_int 1)
]
(const_int 0)))
@@ -725,6 +725,9 @@
;; Pipeline description for z16
(include "3931.md")
+;; Pipeline description for z17
+(include "9175.md")
+
;; Predicates
(include "predicates.md")
@@ -990,6 +993,10 @@
(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")])
+(define_mode_iterator CC_SUZ [CCS CCU CCZ])
+(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")])
+(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")])
+
; Analogue to TOINTVEC / tointvec
(define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")])
(define_mode_attr toint [(TF "ti") (DF "di") (SF "si")])
@@ -2056,7 +2063,7 @@
[(set (match_operand:DI 0 "register_operand" "=d")
(ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a"))
(const_int LXAMODEITER)))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"lxa<lxamode>\t%0,0(%1,0)"
[(set_attr "op_type" "RXY")])
@@ -2066,7 +2073,7 @@
(ashift:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "const_int_operand")))
(const_int LXAMODEITER)))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"lxa<lxamode>\t%0,%2(%1,0)"
[(set_attr "op_type" "RXY")])
@@ -2076,7 +2083,7 @@
(plus:DI (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a"))
(const_int LXAMODEITER))
(match_operand:DI 2 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"lxa<lxamode>\t%0,0(%1,%2)"
[(set_attr "op_type" "RXY")])
@@ -2087,7 +2094,7 @@
(match_operand:SI 2 "const_int_operand")))
(const_int LXAMODEITER))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"lxa<lxamode>\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -2096,7 +2103,7 @@
(plus:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "const_int_operand")))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"lxab\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -2113,7 +2120,7 @@
0)
(const_int LXAMODEITER))
(const_int <LLXAMASK>)))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"llxa<lxamode>\t%0,%2(%1,0)"
[(set_attr "op_type" "RXY")])
@@ -2124,7 +2131,7 @@
(const_int LXAMODEITER))
(const_int <LLXAMASK>))
(match_operand:DI 2 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"llxa<lxamode>\t%0,0(%1,%2)"
[(set_attr "op_type" "RXY")])
@@ -2137,7 +2144,7 @@
(const_int LXAMODEITER))
(const_int <LLXAMASK>))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"llxa<lxamode>\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -2146,7 +2153,7 @@
(plus:DI (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "const_int_operand")))
(match_operand:DI 3 "register_operand" "a")))]
- "TARGET_ARCH15 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+ "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
"llxab\t%0,%2(%1,%3)"
[(set_attr "op_type" "RXY")])
@@ -3594,7 +3601,7 @@
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "immediate_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(set (match_dup 0) (match_dup 1))
@@ -3606,7 +3613,7 @@
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "register_operand" ""))
(use (match_operand 3 "memory_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(unspec [(match_dup 2) (match_dup 3)
@@ -3620,14 +3627,14 @@
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
- (clobber (scratch))]
+ (clobber (match_scratch 3))]
"TARGET_Z10 && reload_completed"
[(parallel
[(unspec [(match_dup 2) (const_int 0)
- (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+ (label_ref (match_dup 4))] UNSPEC_EXECUTE)
(set (match_dup 0) (match_dup 1))
(use (const_int 1))])]
- "operands[3] = gen_label_rtx ();")
+ "operands[4] = gen_label_rtx ();")
(define_split
[(set (match_operand:BLK 0 "memory_operand" "")
@@ -3849,7 +3856,7 @@
(const_int 0))
(use (match_operand 1 "const_int_operand" ""))
(use (match_operand 2 "immediate_operand" ""))
- (clobber (scratch))
+ (clobber (match_scratch 3))
(clobber (reg:CC CC_REGNUM))]
"reload_completed"
[(parallel
@@ -3863,7 +3870,7 @@
(const_int 0))
(use (match_operand 1 "register_operand" ""))
(use (match_operand 2 "memory_operand" ""))
- (clobber (scratch))
+ (clobber (match_scratch 3))
(clobber (reg:CC CC_REGNUM))]
"reload_completed"
[(parallel
@@ -3879,7 +3886,7 @@
(const_int 0))
(use (match_operand 1 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
- (clobber (scratch))
+ (clobber (match_scratch 2))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10 && reload_completed"
[(parallel
@@ -4044,7 +4051,7 @@
(match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "immediate_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
@@ -4057,7 +4064,7 @@
(match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "register_operand" ""))
(use (match_operand 3 "memory_operand" ""))
- (clobber (scratch))]
+ (clobber (match_scratch 4))]
"reload_completed"
[(parallel
[(unspec [(match_dup 2) (match_dup 3)
@@ -4072,7 +4079,7 @@
(match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
- (clobber (scratch))]
+ (clobber (match_scratch 3))]
"TARGET_Z10 && reload_completed"
[(parallel
[(unspec [(match_dup 2) (const_int 0)
@@ -4940,7 +4947,7 @@
(unspec:DI [(match_operand:DI 1 "register_operand" "d")
(match_operand:DI 2 "register_operand" "d")]
UNSPEC_BDEPG))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"bdepg\t%0,%1,%2"
[(set_attr "op_type" "RRF")])
@@ -4953,7 +4960,7 @@
(unspec:DI [(match_operand:DI 1 "register_operand" "d")
(match_operand:DI 2 "register_operand" "d")]
UNSPEC_BEXTG))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"bextg\t%0,%1,%2"
[(set_attr "op_type" "RRF")])
@@ -9580,7 +9587,7 @@
(clz:DI (match_operand:DI 1 "register_operand" "d")))]
"TARGET_EXTIMM && TARGET_ZARCH"
{
- if (!(TARGET_ARCH15 && TARGET_64BIT))
+ if (!(TARGET_Z17 && TARGET_64BIT))
{
rtx_insn *insn;
rtx clz_equal;
@@ -9601,7 +9608,7 @@
(define_insn "*clzg"
[(set (match_operand:DI 0 "register_operand" "=d")
(clz:DI (match_operand:DI 1 "register_operand" "d")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"clzg\t%0,%1"
[(set_attr "op_type" "RRE")])
@@ -9630,7 +9637,7 @@
(define_insn "ctzdi2"
[(set (match_operand:DI 0 "register_operand" "=d")
(ctz:DI (match_operand:DI 1 "register_operand" "d")))]
- "TARGET_ARCH15 && TARGET_64BIT"
+ "TARGET_Z17 && TARGET_64BIT"
"ctzg\t%0,%1"
[(set_attr "op_type" "RRE")])
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index f064597..6753a93 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -122,7 +122,10 @@ EnumValue
Enum(processor_type) String(z16) Value(PROCESSOR_3931_Z16)
EnumValue
-Enum(processor_type) String(arch15) Value(PROCESSOR_ARCH15)
+Enum(processor_type) String(arch15) Value(PROCESSOR_9175_Z17)
+
+EnumValue
+Enum(processor_type) String(z17) Value(PROCESSOR_9175_Z17)
EnumValue
Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index e29255f..6f4e192 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -75,6 +75,8 @@
V1DF V2DF
(V1TF "TARGET_VXE") (TF "TARGET_VXE")])
+(define_mode_iterator VF [(V2SF "TARGET_VXE") (V4SF "TARGET_VXE") V2DF])
+
; All modes present in V_HW1 and VFT.
(define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF
V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
@@ -506,26 +508,89 @@
UNSPEC_VEC_SET))]
"TARGET_VX")
+; Iterator for vec_set that does not use special float/vect overlay tricks
+(define_mode_iterator VEC_SET_NONFLOAT
+ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF V4SF])
+; Iterator for single element float vectors
+(define_mode_iterator VEC_SET_SINGLEFLOAT [(V1SF "TARGET_VXE") V1DF (V1TF "TARGET_VXE")])
+
; FIXME: Support also vector mode operands for 1
; FIXME: A target memory operand seems to be useful otherwise we end
; up with vl vlvgg vst. Shouldn't the middle-end be able to handle
; that itself?
; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih, vleif, vleig
(define_insn "*vec_set<mode>"
- [(set (match_operand:V 0 "register_operand" "=v,v,v")
- (unspec:V [(match_operand:<non_vec> 1 "general_operand" "d,R,K")
- (match_operand:SI 2 "nonmemory_operand" "an,I,I")
- (match_operand:V 3 "register_operand" "0,0,0")]
- UNSPEC_VEC_SET))]
+ [(set (match_operand:VEC_SET_NONFLOAT 0 "register_operand" "=v,v,v")
+ (unspec:VEC_SET_NONFLOAT
+ [(match_operand:<non_vec> 1 "general_operand" "d,R,K")
+ (match_operand:SI 2 "nonmemory_operand" "an,I,I")
+ (match_operand:VEC_SET_NONFLOAT 3 "register_operand" "0,0,0")]
+ UNSPEC_VEC_SET))]
"TARGET_VX
&& (!CONST_INT_P (operands[2])
- || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
+ || UINTVAL (operands[2]) < GET_MODE_NUNITS (<VEC_SET_NONFLOAT:MODE>mode))"
"@
vlvg<bhfgq>\t%v0,%1,%Y2
vle<bhfgq>\t%v0,%1,%2
vlei<bhfgq>\t%v0,%1,%2"
[(set_attr "op_type" "VRS,VRX,VRI")])
+(define_insn "*vec_set<mode>"
+ [(set (match_operand:VEC_SET_SINGLEFLOAT 0 "register_operand" "=v,v")
+ (unspec:VEC_SET_SINGLEFLOAT
+ [(match_operand:<non_vec> 1 "general_operand" "v,R")
+ (match_operand:SI 2 "nonmemory_operand" "an,I")
+ (match_operand:VEC_SET_SINGLEFLOAT 3 "register_operand" "0,0")]
+ UNSPEC_VEC_SET))]
+ "TARGET_VX"
+ "@
+ vlr\t%v0,%v1
+ vle<bhfgq>\t%v0,%1,0"
+ [(set_attr "op_type" "VRR,VRX")])
+
+(define_insn "*vec_setv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=v,v,v,v")
+ (unspec:V2DF [(match_operand:DF 1 "general_operand" "d,R,K,v")
+ (match_operand:SI 2 "nonmemory_operand" "an,I,I,n")
+ (match_operand:V2DF 3 "register_operand" "0,0,0,0")]
+ UNSPEC_VEC_SET))]
+ "TARGET_VX
+ && (!CONST_INT_P (operands[2])
+ || UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))"
+ "@
+ vlvgg\t%v0,%1,%Y2
+ vleg\t%v0,%1,%2
+ vleig\t%v0,%1,%2
+ #"
+ [(set_attr "op_type" "VRS,VRX,VRI,*")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (unspec:V2DF [(match_operand:DF 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")
+ (match_operand:V2DF 3 "register_operand" "")]
+ UNSPEC_VEC_SET))]
+ "TARGET_VX
+ && (UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))
+ && reload_completed
+ && VECTOR_REGNO_P (REGNO (operands[1]))"
+ [(set (match_dup 0)
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_dup 1)
+ (match_dup 3))
+ (parallel [(const_int 0) (match_dup 4)])))]
+{
+ operands[1] = gen_rtx_REG (V2DFmode, REGNO (operands[1]));
+ if (UINTVAL (operands[2]) == 0)
+ operands[4] = GEN_INT (3);
+ else
+ {
+ std::swap (operands[1], operands[3]);
+ operands[4] = GEN_INT (2);
+ }
+})
+
; vlvgb, vlvgh, vlvgf, vlvgg
(define_insn "*vec_set<mode>_plus"
[(set (match_operand:V 0 "register_operand" "=v")
@@ -538,6 +603,14 @@
"vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
[(set_attr "op_type" "VRS")])
+(define_expand "cstoreti4"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "ordered_comparison_operator"
+ [(match_operand:TI 2 "register_operand")
+ (match_operand:TI 3 "register_operand")]))]
+ "TARGET_VX"
+ "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
;; FIXME: Support also vector mode operands for 0
;; This is used via RTL standard name as well as for expanding the builtin
@@ -554,18 +627,66 @@
(define_insn "*vec_extract<mode>"
[(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R")
(vec_select:<non_vec>
- (match_operand:V 1 "nonmemory_operand" "v,v")
+ (match_operand:VI 1 "nonmemory_operand" "v,v")
(parallel
[(match_operand:SI 2 "nonmemory_operand" "an,I")])))]
"TARGET_VX"
{
if (CONST_INT_P (operands[2]))
- operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<V:MODE>mode) - 1));
+ operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VI:MODE>mode) - 1));
if (which_alternative == 0)
return "vlgv<bhfgq>\t%0,%v1,%Y2";
return "vste<bhfgq>\t%v1,%0,%2";
}
- [(set_attr "op_type" "VRS,VRX")])
+ [(set_attr "op_type" "VRS,VRX")
+ (set_attr "mnemonic" "vlgv<bhfgq>,vste<bhfgq>")])
+
+(define_insn "*vec_extract<mode>"
+ [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R,v")
+ (vec_select:<non_vec>
+ (match_operand:VF 1 "nonmemory_operand" "v,v,v")
+ (parallel
+ [(match_operand:SI 2 "nonmemory_operand" "an,I,n")])))]
+ "TARGET_VX"
+ {
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VF:MODE>mode) - 1));
+ if (which_alternative == 0)
+ return "vlgv<bhfgq>\t%0,%v1,%Y2";
+ else if (which_alternative == 1)
+ return "vste<bhfgq>\t%v1,%0,%2";
+ else
+ return "#";
+ }
+ [(set_attr "op_type" "VRS,VRX,*")
+ (set_attr "mnemonic" "vlgv<bhfgq>,vste<bhfgq>,*")])
+
+(define_split
+ [(set (match_operand:<non_vec> 0 "register_operand" "")
+ (vec_select:<non_vec>
+ (match_operand:VF 1 "register_operand" "")
+ (parallel
+ [(match_operand:SI 2 "const_int_operand" "")])))]
+ "TARGET_VX && reload_completed && VECTOR_REGNO_P (REGNO (operands[0]))"
+ [(set (match_dup 0)
+ (vec_duplicate:VF
+ (vec_select:<non_vec>
+ (match_dup 1)
+ (parallel [(match_dup 2)]))))]
+{
+ unsigned HOST_WIDE_INT idx = UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VF:MODE>mode) - 1);
+ if (idx == 0)
+ {
+ rtx dest = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+ emit_insn (gen_mov<VF:mode> (dest, operands[1]));
+ DONE;
+ }
+ else
+ {
+ operands[0] = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+ operands[2] = GEN_INT (idx);
+ }
+})
; vlgvb, vlgvh, vlgvf, vlgvg
(define_insn "*vec_extract<mode>_plus"
@@ -603,10 +724,10 @@
; Replicate from vector element
; vrepb, vreph, vrepf, vrepg
(define_insn "*vec_splat<mode>"
- [(set (match_operand:V_128_NOSINGLE 0 "register_operand" "=v")
- (vec_duplicate:V_128_NOSINGLE
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (vec_duplicate:V
(vec_select:<non_vec>
- (match_operand:V_128_NOSINGLE 1 "register_operand" "v")
+ (match_operand:V 1 "register_operand" "v")
(parallel
[(match_operand:QI 2 "const_mask_operand" "C")]))))]
"TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
@@ -945,7 +1066,7 @@
else
{
reg_pair += 2; // get rid of prefix %f
- snprintf (buf, sizeof (buf), "ldr\t%%f0,%%f1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
+ snprintf (buf, sizeof (buf), "vlr\t%%v0,%%v1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
output_asm_insn (buf, operands);
return "";
}
@@ -2209,6 +2330,28 @@
operands[5] = gen_reg_rtx (V2DImode);
})
+(define_insn "*vec_cmpv2di_lane0_<cc_tolower>"
+ [(set (reg:CC_SUZ CC_REGNUM)
+ (compare:CC_SUZ
+ (vec_select:DI
+ (match_operand:V2DI 0 "register_operand" "v")
+ (parallel [(const_int 0)]))
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "v")
+ (parallel [(const_int 0)]))))]
+ "TARGET_VX"
+ "vec<l>g\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "*vec_cmpti_<cc_tolower>"
+ [(set (reg:CC_SUZ CC_REGNUM)
+ (compare:CC_SUZ
+ (match_operand:TI 0 "register_operand" "v")
+ (match_operand:TI 1 "register_operand" "v")))]
+ "TARGET_VXE3"
+ "vec<l>q\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
;;
;; Floating point compares
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
index 80650b4..e31ae69 100644
--- a/gcc/config/sh/sh-modes.def
+++ b/gcc/config/sh/sh-modes.def
@@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+/* SH has the same reversed quiet bit as MIPS. */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+/* TFmode: IEEE quad floating point (software). */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
/* Vector modes. */
VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODES (INT, 4); /* V4QI V2HI */
diff --git a/gcc/config/sh/sh_treg_combine.cc b/gcc/config/sh/sh_treg_combine.cc
index 3dbd6c3..696fe32 100644
--- a/gcc/config/sh/sh_treg_combine.cc
+++ b/gcc/config/sh/sh_treg_combine.cc
@@ -945,10 +945,7 @@ sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
else
gcc_unreachable ();
- rtx i = get_insns ();
- end_sequence ();
-
- return i;
+ return end_sequence ();
}
rtx_insn *
diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc
index 2196a0c..ffd1fb9 100644
--- a/gcc/config/sparc/sparc.cc
+++ b/gcc/config/sparc/sparc.cc
@@ -4762,8 +4762,7 @@ sparc_legitimize_tls_address (rtx addr)
addr, const1_rtx));
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
RTL_CONST_CALL_P (insn) = 1;
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
emit_libcall_block (insn, ret, o0, addr);
break;
@@ -4782,8 +4781,7 @@ sparc_legitimize_tls_address (rtx addr)
const1_rtx));
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
RTL_CONST_CALL_P (insn) = 1;
- insn = get_insns ();
- end_sequence ();
+ insn = end_sequence ();
/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
share the LD_BASE result with other LD model accesses. */
emit_libcall_block (insn, temp3, o0,
@@ -12530,8 +12528,7 @@ sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
if (!TARGET_VXWORKS_RTP)
pic_offset_table_rtx = got_register_rtx;
scratch = sparc_legitimize_pic_address (funexp, scratch);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
}
else if (TARGET_ARCH32)
@@ -12557,8 +12554,7 @@ sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
start_sequence ();
sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
emit_and_preserve (seq, spill_reg, 0);
break;
@@ -13242,8 +13238,7 @@ sparc_init_pic_reg (void)
load_got_register ();
if (!TARGET_VXWORKS_RTP)
emit_move_insn (pic_offset_table_rtx, got_register_rtx);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
insert_insn_on_edge (seq, entry_edge);
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 4d46cfd..c6e06b4 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -3014,17 +3014,18 @@
rtx shift_16 = GEN_INT (16);
int op1_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (SImode);
op1_subbyte *= GET_MODE_SIZE (SImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+ op1_subbyte),
shift_16));
- emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
+ emit_insn (gen_lshrsi3 (operands[0], temp, shift_16));
DONE;
})
@@ -3097,17 +3098,18 @@
rtx shift_48 = GEN_INT (48);
int op1_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (DImode);
op1_subbyte *= GET_MODE_SIZE (DImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+ emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+ op1_subbyte),
shift_48));
- emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
+ emit_insn (gen_lshrdi3 (operands[0], temp, shift_48));
DONE;
})
@@ -3283,17 +3285,18 @@
rtx shift_16 = GEN_INT (16);
int op1_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (SImode);
op1_subbyte *= GET_MODE_SIZE (SImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+ op1_subbyte),
shift_16));
- emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
+ emit_insn (gen_ashrsi3 (operands[0], temp, shift_16));
DONE;
})
@@ -3315,25 +3318,26 @@
int op1_subbyte = 0;
int op0_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (SImode);
op1_subbyte *= GET_MODE_SIZE (SImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- if (GET_CODE (operand0) == SUBREG)
+ if (GET_CODE (operands[0]) == SUBREG)
{
- op0_subbyte = SUBREG_BYTE (operand0);
+ op0_subbyte = SUBREG_BYTE (operands[0]);
op0_subbyte /= GET_MODE_SIZE (SImode);
op0_subbyte *= GET_MODE_SIZE (SImode);
- operand0 = XEXP (operand0, 0);
+ operands[0] = XEXP (operands[0], 0);
}
- emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+ op1_subbyte),
shift_24));
- if (GET_MODE (operand0) != SImode)
- operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte);
- emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+ if (GET_MODE (operands[0]) != SImode)
+ operands[0] = gen_rtx_SUBREG (SImode, operands[0], op0_subbyte);
+ emit_insn (gen_ashrsi3 (operands[0], temp, shift_24));
DONE;
})
@@ -3354,17 +3358,18 @@
rtx shift_24 = GEN_INT (24);
int op1_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (SImode);
op1_subbyte *= GET_MODE_SIZE (SImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+ op1_subbyte),
shift_24));
- emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+ emit_insn (gen_ashrsi3 (operands[0], temp, shift_24));
DONE;
})
@@ -3385,17 +3390,18 @@
rtx shift_56 = GEN_INT (56);
int op1_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (DImode);
op1_subbyte *= GET_MODE_SIZE (DImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+ emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+ op1_subbyte),
shift_56));
- emit_insn (gen_ashrdi3 (operand0, temp, shift_56));
+ emit_insn (gen_ashrdi3 (operands[0], temp, shift_56));
DONE;
})
@@ -3416,17 +3422,18 @@
rtx shift_48 = GEN_INT (48);
int op1_subbyte = 0;
- if (GET_CODE (operand1) == SUBREG)
+ if (GET_CODE (operands[1]) == SUBREG)
{
- op1_subbyte = SUBREG_BYTE (operand1);
+ op1_subbyte = SUBREG_BYTE (operands[1]);
op1_subbyte /= GET_MODE_SIZE (DImode);
op1_subbyte *= GET_MODE_SIZE (DImode);
- operand1 = XEXP (operand1, 0);
+ operands[1] = XEXP (operands[1], 0);
}
- emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+ emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+ op1_subbyte),
shift_48));
- emit_insn (gen_ashrdi3 (operand0, temp, shift_48));
+ emit_insn (gen_ashrdi3 (operands[0], temp, shift_48));
DONE;
})
diff --git a/gcc/config/stormy16/stormy16.cc b/gcc/config/stormy16/stormy16.cc
index ba2c8cd..5b92743 100644
--- a/gcc/config/stormy16/stormy16.cc
+++ b/gcc/config/stormy16/stormy16.cc
@@ -405,8 +405,7 @@ xstormy16_split_cbranch (machine_mode mode, rtx label, rtx comparison,
start_sequence ();
xstormy16_expand_arith (mode, COMPARE, dest, op0, op1);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
gcc_assert (INSN_P (seq));
diff --git a/gcc/config/stormy16/stormy16.md b/gcc/config/stormy16/stormy16.md
index 70c8282..15c60ad 100644
--- a/gcc/config/stormy16/stormy16.md
+++ b/gcc/config/stormy16/stormy16.md
@@ -702,8 +702,7 @@
[(parallel [(set (match_operand:SI 0 "register_operand" "")
(neg:SI (match_operand:SI 1 "register_operand" "")))
(clobber (reg:BI CARRY_REG))])]
- ""
- { operands[2] = gen_reg_rtx (HImode); })
+ "")
(define_insn_and_split "*negsi2_internal"
[(set (match_operand:SI 0 "register_operand" "=&r")
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index aad4146..9aeaba6 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -183,19 +183,6 @@
(and (match_code "const_int")
(match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
-(define_predicate "reload_operand"
- (match_code "mem")
-{
- const_rtx addr = XEXP (op, 0);
- if (REG_P (addr))
- return REGNO (addr) == A1_REG;
- if (GET_CODE (addr) == PLUS)
- return REG_P (XEXP (addr, 0))
- && REGNO (XEXP (addr, 0)) == A1_REG
- && CONST_INT_P (XEXP (addr, 1));
- return false;
-})
-
(define_predicate "branch_operator"
(match_code "eq,ne,lt,ge"))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 53db06e..92a2364 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -48,7 +48,6 @@ along with GCC; see the file COPYING3. If not see
#include "alias.h"
#include "explow.h"
#include "expr.h"
-#include "reload.h"
#include "langhooks.h"
#include "gimplify.h"
#include "builtins.h"
@@ -197,6 +196,9 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
tree function);
static rtx xtensa_delegitimize_address (rtx);
+static reg_class_t xtensa_ira_change_pseudo_allocno_class (int, reg_class_t,
+ reg_class_t);
+static HARD_REG_SET xtensa_zero_call_used_regs (HARD_REG_SET);
@@ -366,6 +368,12 @@ static rtx xtensa_delegitimize_address (rtx);
#undef TARGET_DIFFERENT_ADDR_DISPLACEMENT_P
#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS xtensa_ira_change_pseudo_allocno_class
+
+#undef TARGET_ZERO_CALL_USED_REGS
+#define TARGET_ZERO_CALL_USED_REGS xtensa_zero_call_used_regs
+
struct gcc_target targetm = TARGET_INITIALIZER;
@@ -1482,8 +1490,7 @@ xtensa_copy_incoming_a7 (rtx opnd)
if (mode == DFmode || mode == DImode)
emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
gen_rtx_REG (SImode, A7_REG - 1)));
- entry_insns = get_insns ();
- end_sequence ();
+ entry_insns = end_sequence ();
if (cfun->machine->vararg_a7)
{
@@ -1644,8 +1651,7 @@ xtensa_expand_block_set_libcall (rtx dst_mem,
GEN_INT (value), SImode,
GEN_INT (bytes), SImode);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
return seq;
}
@@ -1706,8 +1712,7 @@ xtensa_expand_block_set_unrolled_loop (rtx dst_mem,
}
while (bytes > 0);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
return seq;
}
@@ -1788,8 +1793,7 @@ xtensa_expand_block_set_small_loop (rtx dst_mem,
emit_insn (gen_addsi3 (dst, dst, GEN_INT (align)));
emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label);
- seq = get_insns ();
- end_sequence ();
+ seq = end_sequence ();
return seq;
}
@@ -2467,8 +2471,7 @@ xtensa_call_tls_desc (rtx sym, rtx *retp)
emit_move_insn (a_io, arg);
call_insn = emit_call_insn (gen_tls_call (a_io, fn, sym, const1_rtx));
use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), a_io);
- insns = get_insns ();
- end_sequence ();
+ insns = end_sequence ();
*retp = a_io;
return insns;
@@ -3048,6 +3051,8 @@ xtensa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
'K' CONST_INT, print number of bits in mask for EXTUI
'R' CONST_INT, print (X & 0x1f)
'L' CONST_INT, print ((32 - X) & 0x1f)
+ 'U', CONST_DOUBLE:SF, print (REAL_EXP (rval) - 1)
+ 'V', CONST_DOUBLE:SF, print (1 - REAL_EXP (rval))
'D' REG, print second register of double-word register operand
'N' MEM, print address of next word following a memory operand
'v' MEM, if memory reference is volatile, output a MEMW before it
@@ -3144,6 +3149,20 @@ print_operand (FILE *file, rtx x, int letter)
output_operand_lossage ("invalid %%R value");
break;
+ case 'U':
+ if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
+ fprintf (file, "%d", REAL_EXP (CONST_DOUBLE_REAL_VALUE (x)) - 1);
+ else
+ output_operand_lossage ("invalid %%U value");
+ break;
+
+ case 'V':
+ if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
+ fprintf (file, "%d", 1 - REAL_EXP (CONST_DOUBLE_REAL_VALUE (x)));
+ else
+ output_operand_lossage ("invalid %%V value");
+ break;
+
case 'x':
if (CONST_INT_P (x))
printx (file, INTVAL (x));
@@ -4430,17 +4449,25 @@ static int
xtensa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
reg_class_t from, reg_class_t to)
{
- if (from == to && from != BR_REGS && to != BR_REGS)
+ /* If both are equal (except for BR_REGS) or belong to AR_REGS,
+ the cost is 2 (the default value). */
+ if ((from == to && from != BR_REGS && to != BR_REGS)
+ || (reg_class_subset_p (from, AR_REGS)
+ && reg_class_subset_p (to, AR_REGS)))
return 2;
- else if (reg_class_subset_p (from, AR_REGS)
- && reg_class_subset_p (to, AR_REGS))
+
+ /* The cost between AR_REGS and FR_REGS is 2 (the default value). */
+ if ((reg_class_subset_p (from, AR_REGS) && to == FP_REGS)
+ || (from == FP_REGS && reg_class_subset_p (to, AR_REGS)))
return 2;
- else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
- return 3;
- else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+
+ if ((reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+ || (from == ACC_REG && reg_class_subset_p (to, AR_REGS)))
return 3;
- else
- return 10;
+
+ /* Otherwise, spills to stack (because greater than 2x the default
+ MEMORY_MOVE_COST). */
+ return 10;
}
/* Compute a (partial) cost for rtx X. Return true if the complete
@@ -5428,4 +5455,72 @@ xtensa_delegitimize_address (rtx op)
return op;
}
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS, in order to tell
+ the register allocator to avoid using ALL_REGS rclass. */
+
+static reg_class_t
+xtensa_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+ reg_class_t best_class)
+{
+ if (allocno_class != ALL_REGS)
+ return allocno_class;
+
+ if (best_class != ALL_REGS)
+ return best_class;
+
+ return FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) ? FP_REGS : AR_REGS;
+}
+
+/* Implement TARGET_ZERO_CALL_USED_REGS. */
+
+static HARD_REG_SET
+xtensa_zero_call_used_regs (HARD_REG_SET selected_regs)
+{
+ unsigned int regno;
+ int zeroed_regno = -1;
+ hard_reg_set_iterator hrsi;
+ rtvec argvec, convec;
+
+ EXECUTE_IF_SET_IN_HARD_REG_SET (selected_regs, 1, regno, hrsi)
+ {
+ if (GP_REG_P (regno))
+ {
+ emit_move_insn (gen_rtx_REG (SImode, regno), const0_rtx);
+ if (zeroed_regno < 0)
+ zeroed_regno = regno;
+ continue;
+ }
+ if (TARGET_BOOLEANS && BR_REG_P (regno))
+ {
+ gcc_assert (zeroed_regno >= 0);
+ argvec = rtvec_alloc (1);
+ RTVEC_ELT (argvec, 0) = gen_rtx_REG (SImode, zeroed_regno);
+ convec = rtvec_alloc (1);
+ RTVEC_ELT (convec, 0) = gen_rtx_ASM_INPUT (SImode, "r");
+ emit_insn (gen_rtx_ASM_OPERANDS (VOIDmode, "wsr\t%0, BR",
+ "", 0, argvec, convec,
+ rtvec_alloc (0),
+ UNKNOWN_LOCATION));
+ continue;
+ }
+ if (TARGET_HARD_FLOAT && FP_REG_P (regno))
+ {
+ gcc_assert (zeroed_regno >= 0);
+ emit_move_insn (gen_rtx_REG (SFmode, regno),
+ gen_rtx_REG (SFmode, zeroed_regno));
+ continue;
+ }
+ if (TARGET_MAC16 && ACC_REG_P (regno))
+ {
+ gcc_assert (zeroed_regno >= 0);
+ emit_move_insn (gen_rtx_REG (SImode, regno),
+ gen_rtx_REG (SImode, zeroed_regno));
+ continue;
+ }
+ CLEAR_HARD_REG_BIT (selected_regs, regno);
+ }
+
+ return selected_regs;
+}
+
#include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 9009db0..a8a0565 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see
#define TARGET_MINMAX XCHAL_HAVE_MINMAX
#define TARGET_SEXT XCHAL_HAVE_SEXT
#define TARGET_CLAMPS XCHAL_HAVE_CLAMPS
+#define TARGET_DEPBITS XCHAL_HAVE_DEPBITS
#define TARGET_BOOLEANS XCHAL_HAVE_BOOLEANS
#define TARGET_HARD_FLOAT XCHAL_HAVE_FP
#define TARGET_HARD_FLOAT_DIV XCHAL_HAVE_FP_DIV
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 88f011c..029be99 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -41,6 +41,8 @@
UNSPEC_LSETUP_START
UNSPEC_LSETUP_END
UNSPEC_FRAME_BLOCKAGE
+ UNSPEC_CEIL
+ UNSPEC_FLOOR
])
(define_c_enum "unspecv" [
@@ -103,6 +105,11 @@
(define_code_attr m_float [(float "float") (unsigned_float "ufloat")])
(define_code_attr s_float [(float "") (unsigned_float "uns")])
+;; This iterator and attribute allow FP-to-integer rounding of two types
+;; to be generated from one template.
+(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR])
+(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")])
+
;; Attributes.
@@ -1007,7 +1014,7 @@
(set_attr "length" "3")])
-;; Field extract instructions.
+;; Field extract and insert instructions.
(define_expand "extvsi"
[(set (match_operand:SI 0 "register_operand" "")
@@ -1141,6 +1148,25 @@
(set_attr "mode" "SI")
(set_attr "length" "6")])
+(define_insn "insvsi"
+ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+a")
+ (match_operand:SI 1 "extui_fldsz_operand" "i")
+ (match_operand:SI 2 "const_int_operand" "i"))
+ (match_operand:SI 3 "register_operand" "r"))]
+ "TARGET_DEPBITS"
+{
+ int shift;
+ if (BITS_BIG_ENDIAN)
+ shift = (32 - (INTVAL (operands[1]) + INTVAL (operands[2]))) & 0x1f;
+ else
+ shift = INTVAL (operands[2]) & 0x1f;
+ operands[2] = GEN_INT (shift);
+ return "depbits\t%0, %3, %2, %1";
+}
+ [(set_attr "type" "arith")
+ (set_attr "mode" "SI")
+ (set_attr "length" "3")])
+
;; Conversions.
@@ -1168,12 +1194,7 @@
(any_fix:SI (mult:SF (match_operand:SF 1 "register_operand" "f")
(match_operand:SF 2 "fix_scaling_operand" "F"))))]
"TARGET_HARD_FLOAT"
-{
- static char result[64];
- sprintf (result, "<m_fix>.s\t%%0, %%1, %d",
- REAL_EXP (CONST_DOUBLE_REAL_VALUE (operands[2])) - 1);
- return result;
-}
+ "<m_fix>.s\t%0, %1, %U2"
[(set_attr "type" "fconv")
(set_attr "mode" "SF")
(set_attr "length" "3")])
@@ -1192,12 +1213,36 @@
(mult:SF (any_float:SF (match_operand:SI 1 "register_operand" "a"))
(match_operand:SF 2 "float_scaling_operand" "F")))]
"TARGET_HARD_FLOAT"
-{
- static char result[64];
- sprintf (result, "<m_float>.s\t%%0, %%1, %d",
- 1 - REAL_EXP (CONST_DOUBLE_REAL_VALUE (operands[2])));
- return result;
-}
+ "<m_float>.s\t%0, %1, %V2"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "l<m_round>sfsi2"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SF 1 "register_operand" "f")] ANY_ROUND))]
+ "TARGET_HARD_FLOAT"
+ "<m_round>.s\t%0, %1, 0"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "*l<m_round>sfsi2_2x"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(plus:SF (match_operand:SF 1 "register_operand" "f")
+ (match_dup 1))] ANY_ROUND))]
+ "TARGET_HARD_FLOAT"
+ "<m_round>.s\t%0, %1, 1"
+ [(set_attr "type" "fconv")
+ (set_attr "mode" "SF")
+ (set_attr "length" "3")])
+
+(define_insn "*l<m_round>sfsi2_scaled"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(mult:SF (match_operand:SF 1 "register_operand" "f")
+ (match_operand:SF 2 "fix_scaling_operand" "F"))] ANY_ROUND))]
+ "TARGET_HARD_FLOAT"
+ "<m_round>.s\t%0, %1, %U2"
[(set_attr "type" "fconv")
(set_attr "mode" "SF")
(set_attr "length" "3")])
@@ -1453,7 +1498,7 @@
})
(define_insn "movsf_internal"
- [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,^U,D,a,D,R,a,f,a,a,W,a,U")
+ [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,a,D,R,a,f,a,a,W,a,U")
(match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))]
"((register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
@@ -3319,36 +3364,6 @@
(const_int 8)
(const_int 9))))])
-(define_peephole2
- [(set (match_operand:SI 0 "register_operand")
- (match_operand:SI 6 "reload_operand"))
- (set (match_operand:SI 1 "register_operand")
- (match_operand:SI 7 "reload_operand"))
- (set (match_operand:SF 2 "register_operand")
- (match_operand:SF 4 "register_operand"))
- (set (match_operand:SF 3 "register_operand")
- (match_operand:SF 5 "register_operand"))]
- "REGNO (operands[0]) == REGNO (operands[4])
- && REGNO (operands[1]) == REGNO (operands[5])
- && peep2_reg_dead_p (4, operands[0])
- && peep2_reg_dead_p (4, operands[1])"
- [(set (match_dup 2)
- (match_dup 6))
- (set (match_dup 3)
- (match_dup 7))]
-{
- HARD_REG_SET regs;
- int i;
- CLEAR_HARD_REG_SET (regs);
- for (i = 0; i <= 3; ++i)
- if (TEST_HARD_REG_BIT (regs, REGNO (operands[i])))
- FAIL;
- else
- SET_HARD_REG_BIT (regs, REGNO (operands[i]));
- operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0));
- operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0));
-})
-
(define_split
[(clobber (match_operand 0 "register_operand"))]
"HARD_REGISTER_P (operands[0])
@@ -3434,49 +3449,3 @@ FALLTHRU:;
operands[1] = GEN_INT (imm0);
operands[2] = GEN_INT (imm1);
})
-
-(define_peephole2
- [(set (match_operand 0 "register_operand")
- (match_operand 1 "register_operand"))]
- "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0]))
- && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1]))
- && peep2_reg_dead_p (1, operands[1])"
- [(const_int 0)]
-{
- basic_block bb = BLOCK_FOR_INSN (curr_insn);
- rtx_insn *head = BB_HEAD (bb), *insn;
- rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig;
- for (insn = PREV_INSN (curr_insn);
- insn && insn != head;
- insn = PREV_INSN (insn))
- if (CALL_P (insn))
- break;
- else if (INSN_P (insn))
- {
- if (GET_CODE (pattern = PATTERN (insn)) == SET
- && REG_P (t_dest = SET_DEST (pattern))
- && REG_NREGS (t_dest) == 1
- && REGNO (t_dest) == REGNO (src))
- {
- dest_orig = SET_DEST (pattern);
- SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest),
- REGNO (dest));
- extract_insn (insn);
- if (!constrain_operands (true, get_enabled_alternatives (insn)))
- {
- SET_DEST (pattern) = dest_orig;
- goto ABORT;
- }
- df_insn_rescan (insn);
- goto FALLTHRU;
- }
- if (reg_overlap_mentioned_p (dest, pattern)
- || reg_overlap_mentioned_p (src, pattern)
- || set_of (dest, insn)
- || set_of (src, insn))
- break;
- }
-ABORT:
- FAIL;
-FALLTHRU:;
-})