aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorThomas Koenig <tkoenig@gcc.gnu.org>2020-10-28 18:41:24 +0100
committerThomas Koenig <tkoenig@gcc.gnu.org>2020-10-28 18:41:24 +0100
commitbf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8 (patch)
treee513781ef717465e7db0358e987a5a6cbef5665c /gcc/config
parent0c261d5b5c931d9e9214d06531bdc7e9e16aeaab (diff)
parent47d13acbda9a5d8eb57ff169ba74857cd54108e4 (diff)
downloadgcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.zip
gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.tar.gz
gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.tar.bz2
Merge branch 'master' into devel/coarray_native.
Merge into devel/coarray_native to prepare for later merging of coarray_native with master.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c59
-rw-r--r--gcc/config/aarch64/aarch64-cores.def9
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def2
-rw-r--r--gcc/config/aarch64/aarch64-protos.h21
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def93
-rw-r--r--gcc/config/aarch64/aarch64-sve.md923
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md73
-rw-r--r--gcc/config/aarch64/aarch64-tune.md2
-rw-r--r--gcc/config/aarch64/aarch64.c243
-rw-r--r--gcc/config/aarch64/aarch64.md102
-rw-r--r--gcc/config/aarch64/arm_neon.h264
-rw-r--r--gcc/config/arm/arm-builtins.c8
-rw-r--r--gcc/config/arm/arm-cpus.in83
-rw-r--r--gcc/config/arm/arm-protos.h3
-rw-r--r--gcc/config/arm/arm-tables.opt15
-rw-r--r--gcc/config/arm/arm-tune.md6
-rw-r--r--gcc/config/arm/arm.c191
-rw-r--r--gcc/config/arm/arm.h41
-rw-r--r--gcc/config/arm/arm.md12
-rw-r--r--gcc/config/arm/arm_mve.h592
-rw-r--r--gcc/config/arm/arm_mve_builtins.def16
-rw-r--r--gcc/config/arm/constraints.md9
-rw-r--r--gcc/config/arm/iterators.md475
-rw-r--r--gcc/config/arm/mve.md967
-rw-r--r--gcc/config/arm/neon.md423
-rw-r--r--gcc/config/arm/parsecpu.awk51
-rw-r--r--gcc/config/arm/unspecs.md800
-rw-r--r--gcc/config/arm/vec-common.md97
-rw-r--r--gcc/config/arm/vfp.md36
-rw-r--r--gcc/config/arm/vxworks.h2
-rw-r--r--gcc/config/bpf/bpf.md20
-rw-r--r--gcc/config/darwin-protos.h2
-rw-r--r--gcc/config/darwin-sections.def15
-rw-r--r--gcc/config/darwin.c142
-rw-r--r--gcc/config/darwin.h3
-rw-r--r--gcc/config/darwin9.h3
-rw-r--r--gcc/config/gcn/gcn.md43
-rw-r--r--gcc/config/gcn/mkoffload.c7
-rw-r--r--gcc/config/i386/adxintrin.h4
-rw-r--r--gcc/config/i386/amxbf16intrin.h52
-rw-r--r--gcc/config/i386/amxint8intrin.h61
-rw-r--r--gcc/config/i386/amxtileintrin.h98
-rw-r--r--gcc/config/i386/avx2intrin.h3
-rw-r--r--gcc/config/i386/avx512bwintrin.h32
-rw-r--r--gcc/config/i386/avx512dqintrin.h186
-rw-r--r--gcc/config/i386/avx512erintrin.h148
-rw-r--r--gcc/config/i386/avx512fintrin.h163
-rw-r--r--gcc/config/i386/avx512vlbwintrin.h65
-rw-r--r--gcc/config/i386/avx512vlintrin.h85
-rw-r--r--gcc/config/i386/avx512vp2intersectintrin.h23
-rw-r--r--gcc/config/i386/avx512vp2intersectvlintrin.h23
-rw-r--r--gcc/config/i386/avxintrin.h7
-rw-r--r--gcc/config/i386/bmi2intrin.h4
-rw-r--r--gcc/config/i386/bmiintrin.h4
-rw-r--r--gcc/config/i386/cetintrin.h4
-rw-r--r--gcc/config/i386/cldemoteintrin.h4
-rw-r--r--gcc/config/i386/clflushoptintrin.h4
-rw-r--r--gcc/config/i386/clwbintrin.h4
-rw-r--r--gcc/config/i386/constraints.md4
-rw-r--r--gcc/config/i386/cpuid.h5
-rw-r--r--gcc/config/i386/emmintrin.h25
-rw-r--r--gcc/config/i386/enqcmdintrin.h10
-rw-r--r--gcc/config/i386/fxsrintrin.h4
-rw-r--r--gcc/config/i386/hresetintrin.h48
-rw-r--r--gcc/config/i386/i386-builtin-types.def5
-rw-r--r--gcc/config/i386/i386-builtin.def18
-rw-r--r--gcc/config/i386/i386-builtins.c5
-rw-r--r--gcc/config/i386/i386-builtins.h1
-rw-r--r--gcc/config/i386/i386-c.c14
-rw-r--r--gcc/config/i386/i386-expand.c36
-rw-r--r--gcc/config/i386/i386-options.c56
-rw-r--r--gcc/config/i386/i386.c107
-rw-r--r--gcc/config/i386/i386.h36
-rw-r--r--gcc/config/i386/i386.md81
-rw-r--r--gcc/config/i386/i386.opt22
-rw-r--r--gcc/config/i386/ia32intrin.h4
-rw-r--r--gcc/config/i386/immintrin.h207
-rw-r--r--gcc/config/i386/intelmic-mkoffload.c12
-rw-r--r--gcc/config/i386/lwpintrin.h4
-rw-r--r--gcc/config/i386/lzcntintrin.h4
-rw-r--r--gcc/config/i386/mingw-w64.h6
-rw-r--r--gcc/config/i386/mmintrin.h6
-rw-r--r--gcc/config/i386/movdirintrin.h4
-rw-r--r--gcc/config/i386/pconfigintrin.h27
-rw-r--r--gcc/config/i386/pkuintrin.h4
-rw-r--r--gcc/config/i386/predicates.md13
-rw-r--r--gcc/config/i386/rdseedintrin.h4
-rw-r--r--gcc/config/i386/rtmintrin.h4
-rw-r--r--gcc/config/i386/serializeintrin.h4
-rw-r--r--gcc/config/i386/sse.md401
-rw-r--r--gcc/config/i386/subst.md4
-rw-r--r--gcc/config/i386/t-i3863
-rw-r--r--gcc/config/i386/t-rtems8
-rw-r--r--gcc/config/i386/tbmintrin.h4
-rw-r--r--gcc/config/i386/tsxldtrkintrin.h27
-rw-r--r--gcc/config/i386/uintrintrin.h87
-rw-r--r--gcc/config/i386/waitpkgintrin.h4
-rw-r--r--gcc/config/i386/wbnoinvdintrin.h27
-rw-r--r--gcc/config/i386/x86gprintrin.h256
-rw-r--r--gcc/config/i386/x86intrin.h12
-rw-r--r--gcc/config/i386/xsavecintrin.h4
-rw-r--r--gcc/config/i386/xsaveintrin.h4
-rw-r--r--gcc/config/i386/xsaveoptintrin.h4
-rw-r--r--gcc/config/i386/xsavesintrin.h4
-rw-r--r--gcc/config/i386/xtestintrin.h4
-rw-r--r--gcc/config/linux-protos.h2
-rw-r--r--gcc/config/linux.c3
-rw-r--r--gcc/config/msp430/msp430.md4
-rw-r--r--gcc/config/nvptx/mkoffload.c7
-rw-r--r--gcc/config/nvptx/nvptx.c128
-rw-r--r--gcc/config/nvptx/nvptx.h5
-rw-r--r--gcc/config/nvptx/nvptx.md28
-rw-r--r--gcc/config/nvptx/nvptx.opt13
-rw-r--r--gcc/config/pa/pa-hpux11.h5
-rw-r--r--gcc/config/pa/pa32-linux.h5
-rw-r--r--gcc/config/pa/pa64-hpux.h12
-rwxr-xr-xgcc/config/riscv/multilib-generator133
-rw-r--r--gcc/config/riscv/riscv-c.c9
-rw-r--r--gcc/config/riscv/riscv-cores.def49
-rw-r--r--gcc/config/riscv/riscv-protos.h14
-rw-r--r--gcc/config/riscv/riscv.c97
-rw-r--r--gcc/config/riscv/riscv.h25
-rw-r--r--gcc/config/riscv/riscv.opt4
-rw-r--r--gcc/config/riscv/t-riscv2
-rw-r--r--gcc/config/rs6000/altivec.h3
-rw-r--r--gcc/config/rs6000/freebsd64.h60
-rw-r--r--gcc/config/rs6000/linux64.h85
-rw-r--r--gcc/config/rs6000/ppc-asm.h9
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def44
-rw-r--r--gcc/config/rs6000/rs6000-c.c3
-rw-r--r--gcc/config/rs6000/rs6000-call.c220
-rw-r--r--gcc/config/rs6000/rs6000-internal.h2
-rw-r--r--gcc/config/rs6000/rs6000-logue.c15
-rw-r--r--gcc/config/rs6000/rs6000.c131
-rw-r--r--gcc/config/rs6000/rs6000.md105
-rw-r--r--gcc/config/rs6000/smmintrin.h30
-rw-r--r--gcc/config/rs6000/vsx.md61
-rw-r--r--gcc/config/rs6000/vxworks.h290
-rw-r--r--gcc/config/s390/s390-protos.h1
-rw-r--r--gcc/config/s390/s390.c70
-rw-r--r--gcc/config/s390/s390.md54
-rw-r--r--gcc/config/s390/vector.md52
-rw-r--r--gcc/config/t-vxworks2
-rw-r--r--gcc/config/vx-common.h2
-rw-r--r--gcc/config/vxworks.c6
-rw-r--r--gcc/config/vxworks.h11
-rw-r--r--gcc/config/vxworks/_vxworks-versions.h20
147 files changed, 6989 insertions, 3292 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4f33dd9..732a4dc 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2024,7 +2024,7 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
return target;
}
-/* Expand an expression EXP as fpsr or cpsr setter (depending on
+/* Expand an expression EXP as fpsr or fpcr setter (depending on
UNSPEC) using MODE. */
static void
aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
@@ -2034,6 +2034,18 @@ aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
emit_insn (gen_aarch64_set (unspec, mode, op));
}
+/* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
+ Return the target. */
+static rtx
+aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
+ rtx target)
+{
+ expand_operand op;
+ create_output_operand (&op, target, mode);
+ expand_insn (icode, 1, &op);
+ return op.value;
+}
+
/* Expand an expression EXP that calls built-in function FCODE,
with result going to TARGET if that's convenient. IGNORE is true
if the result of the builtin is ignored. */
@@ -2048,26 +2060,26 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
switch (fcode)
{
case AARCH64_BUILTIN_GET_FPCR:
- emit_insn (gen_aarch64_get (UNSPECV_GET_FPCR, SImode, target));
- return target;
+ return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
+ SImode, target);
case AARCH64_BUILTIN_SET_FPCR:
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
return target;
case AARCH64_BUILTIN_GET_FPSR:
- emit_insn (gen_aarch64_get (UNSPECV_GET_FPSR, SImode, target));
- return target;
+ return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
+ SImode, target);
case AARCH64_BUILTIN_SET_FPSR:
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
return target;
case AARCH64_BUILTIN_GET_FPCR64:
- emit_insn (gen_aarch64_get (UNSPECV_GET_FPCR, DImode, target));
- return target;
+ return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
+ DImode, target);
case AARCH64_BUILTIN_SET_FPCR64:
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
return target;
case AARCH64_BUILTIN_GET_FPSR64:
- emit_insn (gen_aarch64_get (UNSPECV_GET_FPSR, DImode, target));
- return target;
+ return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
+ DImode, target);
case AARCH64_BUILTIN_SET_FPSR64:
aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
return target;
@@ -2079,20 +2091,13 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = force_reg (Pmode, expand_normal (arg0));
- if (!target)
- target = gen_reg_rtx (Pmode);
- else
- target = force_reg (Pmode, target);
-
- emit_move_insn (target, op0);
-
if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
{
rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
icode = CODE_FOR_xpaclri;
emit_move_insn (lr, op0);
emit_insn (GEN_FCN (icode) ());
- emit_move_insn (target, lr);
+ return lr;
}
else
{
@@ -2122,20 +2127,18 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
emit_move_insn (x17_reg, op0);
emit_move_insn (x16_reg, op1);
emit_insn (GEN_FCN (icode) ());
- emit_move_insn (target, x17_reg);
+ return x17_reg;
}
- return target;
-
case AARCH64_JSCVT:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = force_reg (DFmode, expand_normal (arg0));
- if (!target)
- target = gen_reg_rtx (SImode);
- else
- target = force_reg (SImode, target);
- emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0));
- return target;
+ {
+ expand_operand ops[2];
+ create_output_operand (&ops[0], target, SImode);
+ op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+ create_input_operand (&ops[1], op0, DFmode);
+ expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
+ return ops[0].value;
+ }
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f30ff35..3aa13f6 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -103,8 +103,11 @@ AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
@@ -133,11 +136,15 @@ AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_
/* ARMv8.4-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
/* Qualcomm ('Q') cores. */
AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
+/* Armv8.5-A Architecture Processors. */
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
+
/* ARMv8-A big.LITTLE implementations. */
AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 8257df9..ca08642 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -155,7 +155,7 @@ AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \
AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "")
/* Enabling/Disabling "rng" only changes "rng". */
-AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "")
+AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "rng")
/* Enabling/Disabling "memtag" only changes "memtag". */
AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index c7e828d..7a34c84 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -136,6 +136,25 @@ enum aarch64_addr_query_type {
ADDR_QUERY_ANY
};
+/* Enumerates values that can be arbitrarily mixed into a calculation
+ in order to make the result of the calculation unique to its use case.
+
+ AARCH64_SALT_SSP_SET
+ AARCH64_SALT_SSP_TEST
+ Used when calculating the address of the stack protection canary value.
+ There is a separate value for setting and testing the canary, meaning
+ that these two operations produce unique addresses: they are different
+ from each other, and from all other address calculations.
+
+ The main purpose of this is to prevent the SET address being spilled
+ to the stack and reloaded for the TEST, since that would give an
+ attacker the opportunity to change the address of the expected
+ canary value. */
+enum aarch64_salt_type {
+ AARCH64_SALT_SSP_SET,
+ AARCH64_SALT_SSP_TEST
+};
+
/* A set of tuning parameters contains references to size and time
cost models and vectors for address cost calculations, register
move costs and memory move costs. */
@@ -608,9 +627,9 @@ opt_machine_mode aarch64_ptrue_all_mode (rtx);
rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
void aarch64_expand_mov_immediate (rtx, rtx);
+rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
rtx aarch64_ptrue_reg (machine_mode);
rtx aarch64_pfalse_reg (machine_mode);
-bool aarch64_sve_pred_dominates_p (rtx *, rtx);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d1b2110..5bc596d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -45,8 +45,8 @@
BUILTIN_VDC (COMBINE, combine, 0, ALL)
VAR1 (COMBINEP, combine, 0, ALL, di)
- BUILTIN_VB (BINOP, pmul, 0, ALL)
- BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, ALL)
+ BUILTIN_VB (BINOP, pmul, 0, NONE)
+ BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, FP)
BUILTIN_VHSDF_DF (UNOP, sqrt, 2, ALL)
BUILTIN_VD_BHSI (BINOP, addp, 0, NONE)
VAR1 (UNOP, addp, 0, NONE, di)
@@ -70,26 +70,26 @@
BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, ALL)
/* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */
- BUILTIN_VDC (GETREG, get_dregoi, 0, ALL)
- BUILTIN_VDC (GETREG, get_dregci, 0, ALL)
- BUILTIN_VDC (GETREG, get_dregxi, 0, ALL)
- VAR1 (GETREGP, get_dregoi, 0, ALL, di)
- VAR1 (GETREGP, get_dregci, 0, ALL, di)
- VAR1 (GETREGP, get_dregxi, 0, ALL, di)
+ BUILTIN_VDC (GETREG, get_dregoi, 0, AUTO_FP)
+ BUILTIN_VDC (GETREG, get_dregci, 0, AUTO_FP)
+ BUILTIN_VDC (GETREG, get_dregxi, 0, AUTO_FP)
+ VAR1 (GETREGP, get_dregoi, 0, AUTO_FP, di)
+ VAR1 (GETREGP, get_dregci, 0, AUTO_FP, di)
+ VAR1 (GETREGP, get_dregxi, 0, AUTO_FP, di)
/* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */
- BUILTIN_VQ (GETREG, get_qregoi, 0, ALL)
- BUILTIN_VQ (GETREG, get_qregci, 0, ALL)
- BUILTIN_VQ (GETREG, get_qregxi, 0, ALL)
- VAR1 (GETREGP, get_qregoi, 0, ALL, v2di)
- VAR1 (GETREGP, get_qregci, 0, ALL, v2di)
- VAR1 (GETREGP, get_qregxi, 0, ALL, v2di)
+ BUILTIN_VQ (GETREG, get_qregoi, 0, AUTO_FP)
+ BUILTIN_VQ (GETREG, get_qregci, 0, AUTO_FP)
+ BUILTIN_VQ (GETREG, get_qregxi, 0, AUTO_FP)
+ VAR1 (GETREGP, get_qregoi, 0, AUTO_FP, v2di)
+ VAR1 (GETREGP, get_qregci, 0, AUTO_FP, v2di)
+ VAR1 (GETREGP, get_qregxi, 0, AUTO_FP, v2di)
/* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */
- BUILTIN_VQ (SETREG, set_qregoi, 0, ALL)
- BUILTIN_VQ (SETREG, set_qregci, 0, ALL)
- BUILTIN_VQ (SETREG, set_qregxi, 0, ALL)
- VAR1 (SETREGP, set_qregoi, 0, ALL, v2di)
- VAR1 (SETREGP, set_qregci, 0, ALL, v2di)
- VAR1 (SETREGP, set_qregxi, 0, ALL, v2di)
+ BUILTIN_VQ (SETREG, set_qregoi, 0, AUTO_FP)
+ BUILTIN_VQ (SETREG, set_qregci, 0, AUTO_FP)
+ BUILTIN_VQ (SETREG, set_qregxi, 0, AUTO_FP)
+ VAR1 (SETREGP, set_qregoi, 0, AUTO_FP, v2di)
+ VAR1 (SETREGP, set_qregci, 0, AUTO_FP, v2di)
+ VAR1 (SETREGP, set_qregxi, 0, AUTO_FP, v2di)
/* Implemented by aarch64_ld1x2<VQ:mode>. */
BUILTIN_VQ (LOADSTRUCT, ld1x2, 0, ALL)
/* Implemented by aarch64_ld1x2<VDC:mode>. */
@@ -159,7 +159,7 @@
BUILTIN_VQN (TERNOP, raddhn2, 0, NONE)
BUILTIN_VQN (TERNOP, rsubhn2, 0, NONE)
- BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0, ALL)
+ BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, ALL)
/* Implemented by aarch64_<sur>qmovn<mode>. */
BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, ALL)
BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, ALL)
@@ -189,11 +189,11 @@
BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, ALL)
BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, ALL)
- BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, ALL)
- BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, ALL)
+ BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, NONE)
+ BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, NONE)
- BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, ALL)
- BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, ALL)
+ BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, NONE)
+ BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, NONE)
BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_lane_, 0, ALL)
BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_lane_, 0, ALL)
@@ -246,10 +246,10 @@
BUILTIN_VHSDF (BINOP, fcadd270, 0, FP)
/* Implemented by aarch64_fcmla{_lane}{q}<rot><mode>. */
- BUILTIN_VHSDF (TERNOP, fcmla0, 0, ALL)
- BUILTIN_VHSDF (TERNOP, fcmla90, 0, ALL)
- BUILTIN_VHSDF (TERNOP, fcmla180, 0, ALL)
- BUILTIN_VHSDF (TERNOP, fcmla270, 0, ALL)
+ BUILTIN_VHSDF (TERNOP, fcmla0, 0, FP)
+ BUILTIN_VHSDF (TERNOP, fcmla90, 0, FP)
+ BUILTIN_VHSDF (TERNOP, fcmla180, 0, FP)
+ BUILTIN_VHSDF (TERNOP, fcmla270, 0, FP)
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane0, 0, ALL)
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane90, 0, ALL)
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane180, 0, ALL)
@@ -338,12 +338,11 @@
BUILTIN_VHSDF (UNOP, nearbyint, 2, FP)
BUILTIN_VHSDF (UNOP, rint, 2, FP)
BUILTIN_VHSDF (UNOP, round, 2, FP)
- BUILTIN_VHSDF_DF (UNOP, frintn, 2, FP)
+ BUILTIN_VHSDF_HSDF (UNOP, frintn, 2, FP)
VAR1 (UNOP, btrunc, 2, FP, hf)
VAR1 (UNOP, ceil, 2, FP, hf)
VAR1 (UNOP, floor, 2, FP, hf)
- VAR1 (UNOP, frintn, 2, FP, hf)
VAR1 (UNOP, nearbyint, 2, FP, hf)
VAR1 (UNOP, rint, 2, FP, hf)
VAR1 (UNOP, round, 2, FP, hf)
@@ -535,8 +534,8 @@
VAR1 (TERNOPU, crypto_sha256su1, 0, ALL, v4si)
/* Implemented by aarch64_crypto_pmull<mode>. */
- VAR1 (BINOPP, crypto_pmull, 0, ALL, di)
- VAR1 (BINOPP, crypto_pmull, 0, ALL, v2di)
+ VAR1 (BINOPP, crypto_pmull, 0, NONE, di)
+ VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di)
/* Implemented by aarch64_tbl3<mode>. */
VAR1 (BINOP, tbl3, 0, ALL, v8qi)
@@ -667,15 +666,15 @@
BUILTIN_VQ_I (TERNOP, bcaxq, 4, ALL)
/* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>. */
- VAR1 (TERNOP, fmlal_low, 0, ALL, v2sf)
- VAR1 (TERNOP, fmlsl_low, 0, ALL, v2sf)
- VAR1 (TERNOP, fmlalq_low, 0, ALL, v4sf)
- VAR1 (TERNOP, fmlslq_low, 0, ALL, v4sf)
+ VAR1 (TERNOP, fmlal_low, 0, FP, v2sf)
+ VAR1 (TERNOP, fmlsl_low, 0, FP, v2sf)
+ VAR1 (TERNOP, fmlalq_low, 0, FP, v4sf)
+ VAR1 (TERNOP, fmlslq_low, 0, FP, v4sf)
/* Implemented by aarch64_fml<f16mac1>l<f16quad>_high<mode>. */
- VAR1 (TERNOP, fmlal_high, 0, ALL, v2sf)
- VAR1 (TERNOP, fmlsl_high, 0, ALL, v2sf)
- VAR1 (TERNOP, fmlalq_high, 0, ALL, v4sf)
- VAR1 (TERNOP, fmlslq_high, 0, ALL, v4sf)
+ VAR1 (TERNOP, fmlal_high, 0, FP, v2sf)
+ VAR1 (TERNOP, fmlsl_high, 0, FP, v2sf)
+ VAR1 (TERNOP, fmlalq_high, 0, FP, v4sf)
+ VAR1 (TERNOP, fmlslq_high, 0, FP, v4sf)
/* Implemented by aarch64_fml<f16mac1>l_lane_lowv2sf. */
VAR1 (QUADOP_LANE, fmlal_lane_low, 0, ALL, v2sf)
VAR1 (QUADOP_LANE, fmlsl_lane_low, 0, ALL, v2sf)
@@ -713,20 +712,20 @@
VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, ALL, v2sf, v4sf)
/* Implemented by aarch64_bfmmlaqv4sf */
- VAR1 (TERNOP, bfmmlaq, 0, ALL, v4sf)
+ VAR1 (TERNOP, bfmmlaq, 0, AUTO_FP, v4sf)
/* Implemented by aarch64_bfmlal<bt>{_lane{q}}v4sf */
- VAR1 (TERNOP, bfmlalb, 0, ALL, v4sf)
- VAR1 (TERNOP, bfmlalt, 0, ALL, v4sf)
+ VAR1 (TERNOP, bfmlalb, 0, FP, v4sf)
+ VAR1 (TERNOP, bfmlalt, 0, FP, v4sf)
VAR1 (QUADOP_LANE, bfmlalb_lane, 0, ALL, v4sf)
VAR1 (QUADOP_LANE, bfmlalt_lane, 0, ALL, v4sf)
VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, ALL, v4sf)
VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, ALL, v4sf)
/* Implemented by aarch64_simd_<sur>mmlav16qi. */
- VAR1 (TERNOP, simd_smmla, 0, ALL, v16qi)
- VAR1 (TERNOPU, simd_ummla, 0, ALL, v16qi)
- VAR1 (TERNOP_SSUS, simd_usmmla, 0, ALL, v16qi)
+ VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
+ VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
+ VAR1 (TERNOP_SSUS, simd_usmmla, 0, NONE, v16qi)
/* Implemented by aarch64_bfcvtn{q}{2}<mode> */
VAR1 (UNOP, bfcvtn, 0, ALL, v4bf)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index cd79aba..31a8c5a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -464,6 +464,95 @@
;;
;; - MNEMONIC is the mnemonic of the associated SVE instruction.
;;
+;; For (3) and (4), we combine these operations with an UNSPEC_SEL
+;; that selects between the result of the FP operation and the "else"
+;; value. (This else value is a merge input for _m ACLE functions
+;; and zero for _z ACLE functions.) The outer pattern then has the form:
+;;
+;; (unspec [pred fp_operation else_value] UNSPEC_SEL)
+;;
+;; This means that the patterns for (3) and (4) have two predicates:
+;; one for the FP operation itself and one for the UNSPEC_SEL.
+;; This pattern is equivalent to the result of combining an instance
+;; of (1) or (2) with a separate vcond instruction, so these patterns
+;; are useful as combine targets too.
+;;
+;; However, in the combine case, the instructions that we want to
+;; combine might use different predicates. Then:
+;;
+;; - Some of the active lanes of the FP operation might be discarded
+;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes,
+;; even for SVE_STRICT_GP, since the operations on those lanes are
+;; effectively dead code.
+;;
+;; - Some of the inactive lanes of the FP operation might be selected
+;; by the UNSPEC_SEL, giving unspecified values for those lanes.
+;; SVE_RELAXED_GP lets us extend the FP operation to cover these
+;; extra lanes, but SVE_STRICT_GP does not.
+;;
+;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
+;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
+;; This typically leads to patterns like:
+;;
+;; (unspec [(match_operand 1 "register_operand" "Upl")
+;; (unspec [(match_operand N)
+;; (const_int SVE_RELAXED_GP)
+;; ...]
+;; UNSPEC_COND_<MNEMONIC>)
+;; ...])
+;;
+;; where operand N is allowed to be anything. These instructions then
+;; have rewrite rules to replace operand N with operand 1, which gives the
+;; instructions a canonical form and means that the original operand N is
+;; not kept live unnecessarily.
+;;
+;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
+;; a subset of the FP operation predicate. This case isn't interesting
+;; for FP operations that have an all-true predicate, since such operations
+;; use SVE_RELAXED_GP instead. And it is not possible for instruction
+;; conditions to track the subset relationship for arbitrary registers.
+;; So in practice, the only useful case for SVE_STRICT_GP is the one
+;; in which the predicates match:
+;;
+;; (unspec [(match_operand 1 "register_operand" "Upl")
+;; (unspec [(match_dup 1)
+;; (const_int SVE_STRICT_GP)
+;; ...]
+;; UNSPEC_COND_<MNEMONIC>)
+;; ...])
+;;
+;; This pattern would also be correct for SVE_RELAXED_GP, but it would
+;; be redundant with the one above. However, if the combine pattern
+;; has multiple FP operations, using a match_operand allows combinations
+;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
+;; that the predicates are the same:
+;;
+;; (unspec [(match_operand 1 "register_operand" "Upl")
+;; (...
+;; (unspec [(match_dup 1)
+;; (match_operand:SI N "aarch64_sve_gp_strictness")
+;; ...]
+;; UNSPEC_COND_<MNEMONIC1>)
+;; (unspec [(match_dup 1)
+;; (match_operand:SI M "aarch64_sve_gp_strictness")
+;; ...]
+;; UNSPEC_COND_<MNEMONIC2>) ...)
+;; ...])
+;;
+;; The fully-relaxed version of this pattern is:
+;;
+;; (unspec [(match_operand 1 "register_operand" "Upl")
+;; (...
+;; (unspec [(match_operand:SI N)
+;; (const_int SVE_RELAXED_GP)
+;; ...]
+;; UNSPEC_COND_<MNEMONIC1>)
+;; (unspec [(match_operand:SI M)
+;; (const_int SVE_RELAXED_GP)
+;; ...]
+;; UNSPEC_COND_<MNEMONIC2>) ...)
+;; ...])
+;;
;; -------------------------------------------------------------------------
;; ---- Note on FFR handling
;; -------------------------------------------------------------------------
@@ -3304,18 +3393,18 @@
)
;; Predicated floating-point unary arithmetic, merging with the first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 3)
- (match_operand:SI 4 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
SVE_COND_FP_UNARY)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
+ "TARGET_SVE"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
@@ -3326,6 +3415,24 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_2_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+ SVE_COND_FP_UNARY)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point unary arithmetic, merging with an independent
;; value.
;;
@@ -3334,20 +3441,18 @@
;; which is handled above rather than here. Marking all the alternatives
;; as earlyclobber helps to make the instruction more regular to the
;; register allocator.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
SVE_COND_FP_UNARY)
(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[3])
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
@@ -3359,6 +3464,25 @@
[(set_attr "movprfx" "*,yes,yes")]
)
+(define_insn "*cond_<optab><mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+ SVE_COND_FP_UNARY)
+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] Square root
;; -------------------------------------------------------------------------
@@ -4649,19 +4773,19 @@
;; Predicated floating-point binary operations that take an integer as their
;; second operand, with inactive lanes coming from the first operand.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
SVE_COND_FP_BINARY_INT)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
@@ -4672,24 +4796,41 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_2_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
+ SVE_COND_FP_BINARY_INT)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point binary operations that take an integer as
;; their second operand, with the values of inactive lanes being distinct
;; from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
SVE_COND_FP_BINARY_INT)
(match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -4713,6 +4854,35 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
+ (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
+ SVE_COND_FP_BINARY_INT)
+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] General binary arithmetic corresponding to rtx codes
;; -------------------------------------------------------------------------
@@ -4813,19 +4983,19 @@
)
;; Predicated floating-point operations, merging with the first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
@@ -4836,20 +5006,39 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_2_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+ SVE_COND_FP_BINARY)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Same for operations that take a 1-bit constant.
-(define_insn_and_rewrite "*cond_<optab><mode>_2_const"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
@@ -4860,20 +5049,39 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_2_const_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ SVE_COND_FP_BINARY_I1)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point operations, merging with the second input.
-(define_insn_and_rewrite "*cond_<optab><mode>_3"
+(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
@@ -4884,14 +5092,33 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_3_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+ SVE_COND_FP_BINARY)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point operations, merging with an independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
SVE_COND_FP_BINARY)
@@ -4899,8 +5126,7 @@
UNSPEC_SEL))]
"TARGET_SVE
&& !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ && !rtx_equal_p (operands[3], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
@@ -4925,22 +5151,52 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; Same for operations that take a 1-bit constant.
-(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
(match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
@@ -4963,6 +5219,34 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
+ (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ SVE_COND_FP_BINARY_I1)
+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] Addition
;; -------------------------------------------------------------------------
@@ -5001,19 +5285,19 @@
;; Predicated floating-point addition of a constant, merging with the
;; first input.
-(define_insn_and_rewrite "*cond_add<mode>_2_const"
+(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
(match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
UNSPEC_COND_FADD)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
@@ -5026,23 +5310,42 @@
[(set_attr "movprfx" "*,*,yes,yes")]
)
+(define_insn "*cond_add<mode>_2_const_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
+ UNSPEC_COND_FADD)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
+ [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
;; Predicated floating-point addition of a constant, merging with an
;; independent value.
-(define_insn_and_rewrite "*cond_add<mode>_any_const"
+(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
(match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
UNSPEC_COND_FADD)
(match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
@@ -5068,6 +5371,37 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
+ UNSPEC_COND_FADD)
+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ #
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; Register merging forms are handled through SVE_COND_FP_BINARY.
;; -------------------------------------------------------------------------
@@ -5110,19 +5444,19 @@
)
;; Predicated FCADD, merging with the first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
SVE_COND_FCADD)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
@@ -5133,22 +5467,39 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_2_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+ SVE_COND_FCADD)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated FCADD, merging with an independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
SVE_COND_FCADD)
(match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
@@ -5172,6 +5523,35 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
+ SVE_COND_FCADD)
+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] Subtraction
;; -------------------------------------------------------------------------
@@ -5209,19 +5589,19 @@
;; Predicated floating-point subtraction from a constant, merging with the
;; second input.
-(define_insn_and_rewrite "*cond_sub<mode>_3_const"
+(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
(match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
UNSPEC_COND_FSUB)
(match_dup 3)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE"
"@
fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
@@ -5232,12 +5612,28 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_sub<mode>_3_const_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+ UNSPEC_COND_FSUB)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+ movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point subtraction from a constant, merging with an
;; independent value.
-;;
-;; The subtraction predicate and the merge predicate are allowed to be
-;; different.
-(define_insn_and_rewrite "*cond_sub<mode>_relaxed_const"
+(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -5272,11 +5668,7 @@
[(set_attr "movprfx" "yes")]
)
-;; Predicated floating-point subtraction from a constant, merging with an
-;; independent value.
-;;
-;; The subtraction predicate and the merge predicate must be the same.
-(define_insn_and_rewrite "*cond_sub<mode>_strict_const"
+(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -5329,19 +5721,19 @@
)
;; Predicated floating-point absolute difference.
-(define_insn_and_rewrite "*aarch64_pred_abd<mode>"
+(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
UNSPEC_COND_FSUB)]
UNSPEC_COND_FABS))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE"
"@
fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
@@ -5352,6 +5744,25 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*aarch64_pred_abd<mode>_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SI 4 "aarch64_sve_gp_strictness")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+ UNSPEC_COND_FSUB)]
+ UNSPEC_COND_FABS))]
+ "TARGET_SVE"
+ "@
+ fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
(define_expand "@aarch64_cond_abd<mode>"
[(set (match_operand:SVE_FULL_F 0 "register_operand")
(unspec:SVE_FULL_F
@@ -5376,82 +5787,124 @@
;; Predicated floating-point absolute difference, merging with the first
;; input.
-(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(unspec:SVE_FULL_F
- [(match_operand 6)
- (match_operand:SI 7 "aarch64_sve_gp_strictness")
+ [(match_operand 5)
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
UNSPEC_COND_FSUB)]
UNSPEC_COND_FABS)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
- && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+ "TARGET_SVE"
"@
fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
"&& (!rtx_equal_p (operands[1], operands[4])
- || !rtx_equal_p (operands[1], operands[6]))"
+ || !rtx_equal_p (operands[1], operands[5]))"
{
operands[4] = copy_rtx (operands[1]);
- operands[6] = copy_rtx (operands[1]);
+ operands[5] = copy_rtx (operands[1]);
}
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*aarch64_cond_abd<mode>_2_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 4 "aarch64_sve_gp_strictness")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+ UNSPEC_COND_FSUB)]
+ UNSPEC_COND_FABS)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point absolute difference, merging with the second
;; input.
-(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(unspec:SVE_FULL_F
- [(match_operand 6)
- (match_operand:SI 7 "aarch64_sve_gp_strictness")
+ [(match_operand 5)
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
UNSPEC_COND_FSUB)]
UNSPEC_COND_FABS)
(match_dup 3)]
UNSPEC_SEL))]
- "TARGET_SVE
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
- && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+ "TARGET_SVE"
"@
fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
"&& (!rtx_equal_p (operands[1], operands[4])
- || !rtx_equal_p (operands[1], operands[6]))"
+ || !rtx_equal_p (operands[1], operands[5]))"
{
operands[4] = copy_rtx (operands[1]);
- operands[6] = copy_rtx (operands[1]);
+ operands[5] = copy_rtx (operands[1]);
}
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*aarch64_cond_abd<mode>_3_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 4 "aarch64_sve_gp_strictness")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+ UNSPEC_COND_FSUB)]
+ UNSPEC_COND_FABS)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point absolute difference, merging with an
;; independent value.
-(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(unspec:SVE_FULL_F
- [(match_operand 7)
- (match_operand:SI 8 "aarch64_sve_gp_strictness")
+ [(match_operand 6)
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
UNSPEC_COND_FSUB)]
@@ -5460,9 +5913,7 @@
UNSPEC_SEL))]
"TARGET_SVE
&& !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
- && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
+ && !rtx_equal_p (operands[3], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
@@ -5472,18 +5923,18 @@
"&& 1"
{
if (reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4]))
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4]))
{
emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
operands[4], operands[1]));
operands[4] = operands[3] = operands[0];
}
else if (!rtx_equal_p (operands[1], operands[5])
- || !rtx_equal_p (operands[1], operands[7]))
+ || !rtx_equal_p (operands[1], operands[6]))
{
operands[5] = copy_rtx (operands[1]);
- operands[7] = copy_rtx (operands[1]);
+ operands[6] = copy_rtx (operands[1]);
}
else
FAIL;
@@ -5491,6 +5942,42 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+ UNSPEC_COND_FSUB)]
+ UNSPEC_COND_FABS)
+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
+ operands[4], operands[1]));
+ operands[4] = operands[3] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] Multiplication
;; -------------------------------------------------------------------------
@@ -6416,20 +6903,20 @@
;; Predicated floating-point ternary operations, merging with the
;; first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")
(match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE"
"@
<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
@@ -6440,22 +6927,42 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_2_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
+ SVE_COND_FP_TERNARY)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point ternary operations, merging with the
;; third input.
-(define_insn_and_rewrite "*cond_<optab><mode>_4"
+(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")
(match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE"
"@
<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
@@ -6466,15 +6973,35 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_4_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+ SVE_COND_FP_TERNARY)
+ (match_dup 4)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated floating-point ternary operations, merging with an
;; independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 6)
- (match_operand:SI 7 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
(match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
@@ -6484,8 +7011,7 @@
"TARGET_SVE
&& !rtx_equal_p (operands[2], operands[5])
&& !rtx_equal_p (operands[3], operands[5])
- && !rtx_equal_p (operands[4], operands[5])
- && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+ && !rtx_equal_p (operands[4], operands[5])"
"@
movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
@@ -6511,6 +7037,41 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
+ (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
+ SVE_COND_FP_TERNARY)
+ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[5])
+ && !rtx_equal_p (operands[3], operands[5])
+ && !rtx_equal_p (operands[4], operands[5])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[5], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[5])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+ operands[5], operands[1]));
+ operands[5] = operands[4] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
;; (fma ...) since target-independent code won't understand the indexing.
(define_insn "@aarch64_<optab>_lane_<mode>"
@@ -6572,20 +7133,20 @@
)
;; Predicated FCMLA, merging with the third input.
-(define_insn_and_rewrite "*cond_<optab><mode>_4"
+(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")
(match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
SVE_COND_FCMLA)
(match_dup 4)]
UNSPEC_SEL))]
- "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+ "TARGET_SVE"
"@
fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
@@ -6596,23 +7157,41 @@
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "*cond_<optab><mode>_4_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+ SVE_COND_FCMLA)
+ (match_dup 4)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; Predicated FCMLA, merging with an independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 6)
- (match_operand:SI 7 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
(match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
SVE_COND_FCMLA)
(match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[4], operands[5])
- && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+ "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
"@
movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
@@ -6636,6 +7215,36 @@
[(set_attr "movprfx" "yes")]
)
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
+ (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
+ SVE_COND_FCMLA)
+ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+ movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+ #"
+ "&& reload_completed
+ && register_operand (operands[5], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[5])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+ operands[5], operands[1]));
+ operands[5] = operands[4] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; Unpredicated FCMLA with indexing.
(define_insn "@aarch64_<optab>_lane_<mode>"
[(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
@@ -7328,34 +7937,52 @@
"TARGET_SVE"
)
-(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
+(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:SI 4 "aarch64_sve_ptrue_flag")
(unspec:SVE_FULL_F
[(match_operand 5)
- (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w")]
UNSPEC_COND_FABS)
(unspec:SVE_FULL_F
- [(match_operand 7)
- (match_operand:SI 8 "aarch64_sve_gp_strictness")
+ [(match_operand 6)
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 3 "register_operand" "w")]
UNSPEC_COND_FABS)]
SVE_COND_FP_ABS_CMP))]
- "TARGET_SVE
- && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
- && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
+ "TARGET_SVE"
"fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
"&& (!rtx_equal_p (operands[1], operands[5])
- || !rtx_equal_p (operands[1], operands[7]))"
+ || !rtx_equal_p (operands[1], operands[6]))"
{
operands[5] = copy_rtx (operands[1]);
- operands[7] = copy_rtx (operands[1]);
+ operands[6] = copy_rtx (operands[1]);
}
)
+(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+ UNSPEC_COND_FABS)
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+ UNSPEC_COND_FABS)]
+ SVE_COND_FP_ABS_CMP))]
+ "TARGET_SVE"
+ "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [PRED] Select
;; -------------------------------------------------------------------------
@@ -7937,20 +8564,18 @@
;; the same register (despite having different modes). Making all the
;; alternatives earlyclobber makes things more consistent for the
;; register allocator.
-(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
(unspec:SVE_FULL_HSDI
[(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_FULL_HSDI
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
SVE_COND_FCVTI)
(match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
"@
fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
@@ -7962,6 +8587,25 @@
[(set_attr "movprfx" "*,yes,yes")]
)
+(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
+ (unspec:SVE_FULL_HSDI
+ [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_FULL_HSDI
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+ SVE_COND_FCVTI)
+ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+ "@
+ fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+ movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+ movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
;; Predicated narrowing float-to-integer conversion with merging.
(define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand")
@@ -8101,20 +8745,18 @@
;; the same register (despite having different modes). Making all the
;; alternatives earlyclobber makes things more consistent for the
;; register allocator.
-(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
+(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_FULL_F
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
SVE_COND_ICVTF)
(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
- "TARGET_SVE
- && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
"@
<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
@@ -8126,6 +8768,25 @@
[(set_attr "movprfx" "*,yes,yes")]
)
+(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
+ (unspec:SVE_FULL_F
+ [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
+ SVE_COND_ICVTF)
+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+ "@
+ <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+ movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+ movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
;; Predicated widening integer-to-float conversion with merging.
(define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
[(set (match_operand:VNx2DF_ONLY 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index e18b9fe..0cafd0b 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1890,18 +1890,18 @@
)
;; These instructions do not take MOVPRFX.
-(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
(unspec:SVE_FULL_SDF
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(unspec:SVE_FULL_SDF
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:<VNARROW> 2 "register_operand" "w")]
SVE2_COND_FP_UNARY_LONG)
(match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
UNSPEC_SEL))]
- "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE2"
"<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
"&& !rtx_equal_p (operands[1], operands[4])"
{
@@ -1909,6 +1909,21 @@
}
)
+(define_insn "*cond_<sve_fp_op><mode>_strict"
+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_SDF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_FULL_SDF
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VNARROW> 2 "register_operand" "w")]
+ SVE2_COND_FP_UNARY_LONG)
+ (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Narrowing conversions
;; -------------------------------------------------------------------------
@@ -1963,20 +1978,18 @@
"TARGET_SVE2"
)
-(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any"
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
(unspec:VNx4SF_ONLY
[(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:VNx4SF_ONLY
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
SVE2_COND_FP_UNARY_NARROWB)
(match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
- "TARGET_SVE2
- && !rtx_equal_p (operands[2], operands[3])
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
@@ -1988,6 +2001,25 @@
[(set_attr "movprfx" "*,yes,yes")]
)
+(define_insn "*cond_<sve_fp_op><mode>_any_strict"
+ [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
+ (unspec:VNx4SF_ONLY
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:VNx4SF_ONLY
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
+ SVE2_COND_FP_UNARY_NARROWB)
+ (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+ movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_*
;; pair because the even elements always have to be supplied for active
;; elements, even if the inactive elements don't matter.
@@ -2113,14 +2145,12 @@
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:<V_INT_EQUIV>
[(match_operand 4)
- (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
SVE2_COND_INT_UNARY_FP)
(match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
- "TARGET_SVE2
- && !rtx_equal_p (operands[2], operands[3])
- && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+ "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
@@ -2132,6 +2162,25 @@
[(set_attr "movprfx" "*,yes,yes")]
)
+(define_insn "*cond_<sve_fp_op><mode>_strict"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
+ (unspec:<V_INT_EQUIV>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:<V_INT_EQUIV>
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+ SVE2_COND_INT_UNARY_FP)
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Polynomial multiplication
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 0e3239c..e060302 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b251f39..a8cc545 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1336,6 +1336,58 @@ static const struct tune_params neoversen1_tunings =
&generic_prefetch_tune
};
+static const struct tune_params neoversev1_tunings =
+{
+ &cortexa57_extra_costs,
+ &generic_addrcost_table,
+ &generic_regmove_cost,
+ &cortexa57_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_256, /* sve_width */
+ 4, /* memmov_cost */
+ 3, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
+ "32:16", /* function_align. */
+ "4", /* jump_align. */
+ "32:16", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 2, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
+};
+
+static const struct tune_params neoversen2_tunings =
+{
+ &cortexa57_extra_costs,
+ &generic_addrcost_table,
+ &generic_regmove_cost,
+ &cortexa57_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_128, /* sve_width */
+ 4, /* memmov_cost */
+ 3, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
+ "32:16", /* function_align. */
+ "4", /* jump_align. */
+ "32:16", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 2, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
+};
+
static const struct tune_params a64fx_tunings =
{
&generic_extra_costs,
@@ -1935,6 +1987,29 @@ aarch64_sve_abi (void)
return sve_abi;
}
+/* If X is an UNSPEC_SALT_ADDR expression, return the address that it
+ wraps, otherwise return X itself. */
+
+static rtx
+strip_salt (rtx x)
+{
+ rtx search = x;
+ if (GET_CODE (search) == CONST)
+ search = XEXP (search, 0);
+ if (GET_CODE (search) == UNSPEC && XINT (search, 1) == UNSPEC_SALT_ADDR)
+ x = XVECEXP (search, 0, 0);
+ return x;
+}
+
+/* Like strip_offset, but also strip any UNSPEC_SALT_ADDR from the
+ expression. */
+
+static rtx
+strip_offset_and_salt (rtx addr, poly_int64 *offset)
+{
+ return strip_salt (strip_offset (addr, offset));
+}
+
/* Generate code to enable conditional branches in functions over 1 MiB. */
const char *
aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
@@ -2932,14 +3007,9 @@ static enum tls_model
tls_symbolic_operand_type (rtx addr)
{
enum tls_model tls_kind = TLS_MODEL_NONE;
- if (GET_CODE (addr) == CONST)
- {
- poly_int64 addend;
- rtx sym = strip_offset (addr, &addend);
- if (GET_CODE (sym) == SYMBOL_REF)
- tls_kind = SYMBOL_REF_TLS_MODEL (sym);
- }
- else if (GET_CODE (addr) == SYMBOL_REF)
+ poly_int64 offset;
+ addr = strip_offset_and_salt (addr, &offset);
+ if (GET_CODE (addr) == SYMBOL_REF)
tls_kind = SYMBOL_REF_TLS_MODEL (addr);
return tls_kind;
@@ -3404,11 +3474,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
}
}
+/* Return true if we should split a move from 128-bit value SRC
+ to 128-bit register DEST. */
+
bool
aarch64_split_128bit_move_p (rtx dst, rtx src)
{
- return (! REG_P (src)
- || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+ if (FP_REGNUM_P (REGNO (dst)))
+ return REG_P (src) && !FP_REGNUM_P (REGNO (src));
+ /* All moves to GPRs need to be split. */
+ return true;
}
/* Split a complex SIMD combine. */
@@ -3694,24 +3769,6 @@ aarch64_pfalse_reg (machine_mode mode)
return gen_lowpart (mode, reg);
}
-/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is
- true, or alternatively if we know that the operation predicated by
- PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a
- aarch64_sve_gp_strictness operand that describes the operation
- predicated by PRED1[0]. */
-
-bool
-aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2)
-{
- machine_mode mode = GET_MODE (pred2);
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
- && mode == GET_MODE (pred1[0])
- && aarch64_sve_gp_strictness (pred1[1], SImode));
- return (pred1[0] == CONSTM1_RTX (mode)
- || INTVAL (pred1[1]) == SVE_RELAXED_GP
- || rtx_equal_p (pred1[0], pred2));
-}
-
/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
for it. PRED2[0] is the predicate for the instruction whose result
is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
@@ -5239,6 +5296,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
as_a <scalar_int_mode> (mode));
}
+/* Return the MEM rtx that provides the canary value that should be used
+ for stack-smashing protection. MODE is the mode of the memory.
+ For SSP_GLOBAL, DECL_RTL is the MEM rtx for the canary variable
+ (__stack_chk_guard), otherwise it has no useful value. SALT_TYPE
+ indicates whether the caller is performing a SET or a TEST operation. */
+
+rtx
+aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
+ aarch64_salt_type salt_type)
+{
+ rtx addr;
+ if (aarch64_stack_protector_guard == SSP_GLOBAL)
+ {
+ gcc_assert (MEM_P (decl_rtl));
+ addr = XEXP (decl_rtl, 0);
+ poly_int64 offset;
+ rtx base = strip_offset_and_salt (addr, &offset);
+ if (!SYMBOL_REF_P (base))
+ return decl_rtl;
+
+ rtvec v = gen_rtvec (2, base, GEN_INT (salt_type));
+ addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_SALT_ADDR);
+ addr = gen_rtx_CONST (Pmode, addr);
+ addr = plus_constant (Pmode, addr, offset);
+ }
+ else
+ {
+ /* Calculate the address from the system register. */
+ rtx salt = GEN_INT (salt_type);
+ addr = gen_reg_rtx (mode);
+ if (mode == DImode)
+ emit_insn (gen_reg_stack_protect_address_di (addr, salt));
+ else
+ {
+ emit_insn (gen_reg_stack_protect_address_si (addr, salt));
+ addr = convert_memory_address (Pmode, addr);
+ }
+ addr = plus_constant (Pmode, addr, aarch64_stack_protector_guard_offset);
+ }
+ return gen_rtx_MEM (mode, force_reg (Pmode, addr));
+}
+
/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
that is known to contain PTRUE. */
@@ -8677,8 +8776,6 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
static bool
aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
- rtx base, offset;
-
if (GET_CODE (x) == HIGH)
return true;
@@ -8688,10 +8785,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
if (GET_CODE (*iter) == CONST_POLY_INT)
return true;
- split_const (x, &base, &offset);
+ poly_int64 offset;
+ rtx base = strip_offset_and_salt (x, &offset);
if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
{
- if (aarch64_classify_symbol (base, INTVAL (offset))
+ /* We checked for POLY_INT_CST offsets above. */
+ if (aarch64_classify_symbol (base, offset.to_constant ())
!= SYMBOL_FORCE_TO_MEM)
return true;
else
@@ -9217,9 +9316,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
&& GET_MODE_SIZE (mode).is_constant (&const_size)
&& const_size >= 4)
{
- rtx sym, addend;
-
- split_const (x, &sym, &addend);
+ poly_int64 offset;
+ rtx sym = strip_offset_and_salt (x, &offset);
return ((GET_CODE (sym) == LABEL_REF
|| (GET_CODE (sym) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (sym)
@@ -9234,10 +9332,12 @@ aarch64_classify_address (struct aarch64_address_info *info,
if (allow_reg_index_p
&& aarch64_base_register_rtx_p (info->base, strict_p))
{
- rtx sym, offs;
- split_const (info->offset, &sym, &offs);
+ poly_int64 offset;
+ HOST_WIDE_INT const_offset;
+ rtx sym = strip_offset_and_salt (info->offset, &offset);
if (GET_CODE (sym) == SYMBOL_REF
- && (aarch64_classify_symbol (sym, INTVAL (offs))
+ && offset.is_constant (&const_offset)
+ && (aarch64_classify_symbol (sym, const_offset)
== SYMBOL_SMALL_ABSOLUTE))
{
/* The symbol and offset must be aligned to the access size. */
@@ -9263,7 +9363,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
if (known_eq (ref_size, 0))
ref_size = GET_MODE_SIZE (DImode);
- return (multiple_p (INTVAL (offs), ref_size)
+ return (multiple_p (const_offset, ref_size)
&& multiple_p (align / BITS_PER_UNIT, ref_size));
}
}
@@ -9295,9 +9395,8 @@ aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
bool
aarch64_symbolic_address_p (rtx x)
{
- rtx offset;
-
- split_const (x, &x, &offset);
+ poly_int64 offset;
+ x = strip_offset_and_salt (x, &offset);
return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
}
@@ -10028,27 +10127,16 @@ aarch64_print_operand (FILE *f, rtx x, int code)
switch (code)
{
case 'c':
- switch (GET_CODE (x))
+ if (CONST_INT_P (x))
+ fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ else
{
- case CONST_INT:
- fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
- break;
-
- case SYMBOL_REF:
- output_addr_const (f, x);
- break;
-
- case CONST:
- if (GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
- {
- output_addr_const (f, x);
- break;
- }
- /* Fall through. */
-
- default:
- output_operand_lossage ("unsupported operand for code '%c'", code);
+ poly_int64 offset;
+ rtx base = strip_offset_and_salt (x, &offset);
+ if (SYMBOL_REF_P (base))
+ output_addr_const (f, x);
+ else
+ output_operand_lossage ("unsupported operand for code '%c'", code);
}
break;
@@ -10623,6 +10711,19 @@ aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
output_addr_const (f, x);
}
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
+
+static bool
+aarch64_output_addr_const_extra (FILE *file, rtx x)
+{
+ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR)
+ {
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ return true;
+ }
+ return false;
+}
+
bool
aarch64_label_mentioned_p (rtx x)
{
@@ -15932,6 +16033,7 @@ aarch64_tls_symbol_p (rtx x)
if (! TARGET_HAVE_TLS)
return false;
+ x = strip_salt (x);
if (GET_CODE (x) != SYMBOL_REF)
return false;
@@ -15987,6 +16089,8 @@ aarch64_classify_tls_symbol (rtx x)
enum aarch64_symbol_type
aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
{
+ x = strip_salt (x);
+
if (GET_CODE (x) == LABEL_REF)
{
switch (aarch64_cmodel)
@@ -16086,11 +16190,10 @@ aarch64_constant_address_p (rtx x)
bool
aarch64_legitimate_pic_operand_p (rtx x)
{
- if (GET_CODE (x) == SYMBOL_REF
- || (GET_CODE (x) == CONST
- && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
- return false;
+ poly_int64 offset;
+ x = strip_offset_and_salt (x, &offset);
+ if (GET_CODE (x) == SYMBOL_REF)
+ return false;
return true;
}
@@ -16136,7 +16239,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
/* If an offset is being added to something else, we need to allow the
base to be moved into the destination register, meaning that there
are no free temporaries for the offset. */
- x = strip_offset (x, &offset);
+ x = strip_offset_and_salt (x, &offset);
if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
return false;
@@ -18035,6 +18138,7 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
return aarch64_simd_valid_immediate (x, NULL);
}
+ x = strip_salt (x);
if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
return true;
@@ -23890,6 +23994,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_PRINT_OPERAND_ADDRESS
#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA aarch64_output_addr_const_extra
+
#undef TARGET_OPTAB_SUPPORTED_P
#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index dbc6b1d..78fe7c43 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -281,6 +281,7 @@
UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag.
UNSPEC_TAG_SPACE ; Translate address to MTE tag address space.
UNSPEC_LD1RO
+ UNSPEC_SALT_ADDR
])
(define_c_enum "unspecv" [
@@ -1360,13 +1361,14 @@
(define_insn "*movti_aarch64"
[(set (match_operand:TI 0
- "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m")
+ "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m")
(match_operand:TI 1
- "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
+ "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
"(register_operand (operands[0], TImode)
|| aarch64_reg_or_zero (operands[1], TImode))"
"@
#
+ movi\\t%0.2d, #0
#
#
mov\\t%0.16b, %1.16b
@@ -1375,11 +1377,11 @@
stp\\txzr, xzr, %0
ldr\\t%q0, %1
str\\t%q1, %0"
- [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
load_16,store_16,store_16,\
load_16,store_16")
- (set_attr "length" "8,8,8,4,4,4,4,4,4")
- (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
+ (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
+ (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
)
;; Split a TImode register-register or register-immediate move into
@@ -1510,9 +1512,9 @@
(define_insn "*movtf_aarch64"
[(set (match_operand:TF 0
- "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
+ "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
(match_operand:TF 1
- "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
+ "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
"TARGET_FLOAT && (register_operand (operands[0], TFmode)
|| aarch64_reg_or_fp_zero (operands[1], TFmode))"
"@
@@ -1535,7 +1537,7 @@
(define_split
[(set (match_operand:TF 0 "register_operand" "")
- (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
+ (match_operand:TF 1 "nonmemory_operand" ""))]
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
[(const_int 0)]
{
@@ -6881,43 +6883,37 @@
DONE;
})
-;; Named patterns for stack smashing protection.
+;; Defined for -mstack-protector-guard=sysreg, which goes through this
+;; pattern rather than stack_protect_combined_set. Our implementation
+;; of the latter can handle both.
(define_expand "stack_protect_set"
[(match_operand 0 "memory_operand")
- (match_operand 1 "memory_operand")]
+ (match_operand 1 "")]
""
{
- machine_mode mode = GET_MODE (operands[0]);
- if (aarch64_stack_protector_guard != SSP_GLOBAL)
- {
- /* Generate access through the system register. */
- rtx tmp_reg = gen_reg_rtx (mode);
- if (mode == DImode)
- {
- emit_insn (gen_reg_stack_protect_address_di (tmp_reg));
- emit_insn (gen_adddi3 (tmp_reg, tmp_reg,
- GEN_INT (aarch64_stack_protector_guard_offset)));
- }
- else
- {
- emit_insn (gen_reg_stack_protect_address_si (tmp_reg));
- emit_insn (gen_addsi3 (tmp_reg, tmp_reg,
- GEN_INT (aarch64_stack_protector_guard_offset)));
+ emit_insn (gen_stack_protect_combined_set (operands[0], operands[1]));
+ DONE;
+})
- }
- operands[1] = gen_rtx_MEM (mode, tmp_reg);
- }
-
+(define_expand "stack_protect_combined_set"
+ [(match_operand 0 "memory_operand")
+ (match_operand 1 "")]
+ ""
+{
+ machine_mode mode = GET_MODE (operands[0]);
+ operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1],
+ AARCH64_SALT_SSP_SET);
emit_insn ((mode == DImode
? gen_stack_protect_set_di
: gen_stack_protect_set_si) (operands[0], operands[1]));
DONE;
})
+;; Operand 1 is either AARCH64_SALT_SSP_SET or AARCH64_SALT_SSP_TEST.
(define_insn "reg_stack_protect_address_<mode>"
[(set (match_operand:PTR 0 "register_operand" "=r")
- (unspec:PTR [(const_int 0)]
- UNSPEC_SSP_SYSREG))]
+ (unspec:PTR [(match_operand 1 "const_int_operand")]
+ UNSPEC_SSP_SYSREG))]
"aarch64_stack_protector_guard != SSP_GLOBAL"
{
char buf[150];
@@ -6940,37 +6936,29 @@
[(set_attr "length" "12")
(set_attr "type" "multiple")])
+;; Defined for -mstack-protector-guard=sysreg, which goes through this
+;; pattern rather than stack_protect_combined_test. Our implementation
+;; of the latter can handle both.
(define_expand "stack_protect_test"
[(match_operand 0 "memory_operand")
- (match_operand 1 "memory_operand")
+ (match_operand 1 "")
(match_operand 2)]
""
{
- machine_mode mode = GET_MODE (operands[0]);
-
- if (aarch64_stack_protector_guard != SSP_GLOBAL)
- {
- /* Generate access through the system register. The
- sequence we want here is the access
- of the stack offset to come with
- mrs scratch_reg, <system_register>
- add scratch_reg, scratch_reg, :lo12:offset. */
- rtx tmp_reg = gen_reg_rtx (mode);
- if (mode == DImode)
- {
- emit_insn (gen_reg_stack_protect_address_di (tmp_reg));
- emit_insn (gen_adddi3 (tmp_reg, tmp_reg,
- GEN_INT (aarch64_stack_protector_guard_offset)));
- }
- else
- {
- emit_insn (gen_reg_stack_protect_address_si (tmp_reg));
- emit_insn (gen_addsi3 (tmp_reg, tmp_reg,
- GEN_INT (aarch64_stack_protector_guard_offset)));
+ emit_insn (gen_stack_protect_combined_test (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
- }
- operands[1] = gen_rtx_MEM (mode, tmp_reg);
- }
+(define_expand "stack_protect_combined_test"
+ [(match_operand 0 "memory_operand")
+ (match_operand 1 "")
+ (match_operand 2)]
+ ""
+{
+ machine_mode mode = GET_MODE (operands[0]);
+ operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1],
+ AARCH64_SALT_SSP_TEST);
emit_insn ((mode == DImode
? gen_stack_protect_test_di
: gen_stack_protect_test_si) (operands[0], operands[1]));
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 50f8b23..85c0d62 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6088,6 +6088,20 @@ vreinterpretq_u32_p128 (poly128_t __a)
return (uint32x4_t)__a;
}
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f64_p128 (poly128_t __a)
+{
+ return (float64x2_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f64 (float64x2_t __a)
+{
+ return (poly128_t) __a;
+}
+
/* vset_lane */
__extension__ extern __inline float16x4_t
@@ -12670,6 +12684,13 @@ vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
return (__a == __b);
}
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqq_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+ return (__a == __b);
+}
+
/* vceq - scalar. */
__extension__ extern __inline uint32_t
@@ -12779,6 +12800,13 @@ vceqz_u64 (uint64x1_t __a)
return (__a == __AARCH64_UINT64_C (0));
}
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqz_p64 (poly64x1_t __a)
+{
+ return (__a == __AARCH64_UINT64_C (0));
+}
+
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzq_f32 (float32x4_t __a)
@@ -12856,6 +12884,13 @@ vceqzq_u64 (uint64x2_t __a)
return (__a == __AARCH64_UINT64_C (0));
}
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzq_p64 (poly64x2_t __a)
+{
+ return (__a == __AARCH64_UINT64_C (0));
+}
+
/* vceqz - scalar. */
__extension__ extern __inline uint32_t
@@ -14054,6 +14089,48 @@ vclsq_s32 (int32x4_t __a)
return __builtin_aarch64_clrsbv4si (__a);
}
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u8 (uint8x8_t __a)
+{
+ return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u16 (uint16x4_t __a)
+{
+ return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u32 (uint32x2_t __a)
+{
+ return __builtin_aarch64_clrsbv2si ((int32x2_t) __a);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u8 (uint8x16_t __a)
+{
+ return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u16 (uint16x8_t __a)
+{
+ return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u32 (uint32x4_t __a)
+{
+ return __builtin_aarch64_clrsbv4si ((int32x4_t) __a);
+}
+
/* vclz. */
__extension__ extern __inline int8x8_t
@@ -15538,7 +15615,7 @@ vdupq_n_f64 (float64_t __a)
__extension__ extern __inline poly8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p8 (uint32_t __a)
+vdupq_n_p8 (poly8_t __a)
{
return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
__a, __a, __a, __a, __a, __a, __a, __a};
@@ -15546,21 +15623,21 @@ vdupq_n_p8 (uint32_t __a)
__extension__ extern __inline poly16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p16 (uint32_t __a)
+vdupq_n_p16 (poly16_t __a)
{
return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
}
__extension__ extern __inline poly64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p64 (uint64_t __a)
+vdupq_n_p64 (poly64_t __a)
{
return (poly64x2_t) {__a, __a};
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s8 (int32_t __a)
+vdupq_n_s8 (int8_t __a)
{
return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
__a, __a, __a, __a, __a, __a, __a, __a};
@@ -15568,7 +15645,7 @@ vdupq_n_s8 (int32_t __a)
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s16 (int32_t __a)
+vdupq_n_s16 (int16_t __a)
{
return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
}
@@ -15589,7 +15666,7 @@ vdupq_n_s64 (int64_t __a)
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u8 (uint32_t __a)
+vdupq_n_u8 (uint8_t __a)
{
return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
__a, __a, __a, __a, __a, __a, __a, __a};
@@ -15597,7 +15674,7 @@ vdupq_n_u8 (uint32_t __a)
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u16 (uint32_t __a)
+vdupq_n_u16 (uint16_t __a)
{
return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
}
@@ -19613,6 +19690,13 @@ vld4q_p64 (const poly64_t * __a)
return ret;
}
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldrq_p128 (const poly128_t * __ptr)
+{
+ return *__ptr;
+}
+
/* vldn_dup */
__extension__ extern __inline int8x8x2_t
@@ -23962,42 +24046,42 @@ __extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovun_s16 (int16x8_t __a)
{
- return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
+ return __builtin_aarch64_sqmovunv8hi_us (__a);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovun_s32 (int32x4_t __a)
{
- return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
+ return __builtin_aarch64_sqmovunv4si_us (__a);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovun_s64 (int64x2_t __a)
{
- return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
+ return __builtin_aarch64_sqmovunv2di_us (__a);
}
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovunh_s16 (int16_t __a)
{
- return (int8_t) __builtin_aarch64_sqmovunhi (__a);
+ return __builtin_aarch64_sqmovunhi_us (__a);
}
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovuns_s32 (int32_t __a)
{
- return (int16_t) __builtin_aarch64_sqmovunsi (__a);
+ return __builtin_aarch64_sqmovunsi_us (__a);
}
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqmovund_s64 (int64_t __a)
{
- return (int32_t) __builtin_aarch64_sqmovundi (__a);
+ return __builtin_aarch64_sqmovundi_us (__a);
}
/* vqneg */
@@ -24253,28 +24337,28 @@ vqrshld_s64 (int64_t __a, int64_t __b)
__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlb_u8 (uint8_t __a, uint8_t __b)
+vqrshlb_u8 (uint8_t __a, int8_t __b)
{
return __builtin_aarch64_uqrshlqi_uus (__a, __b);
}
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlh_u16 (uint16_t __a, uint16_t __b)
+vqrshlh_u16 (uint16_t __a, int16_t __b)
{
return __builtin_aarch64_uqrshlhi_uus (__a, __b);
}
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshls_u32 (uint32_t __a, uint32_t __b)
+vqrshls_u32 (uint32_t __a, int32_t __b)
{
return __builtin_aarch64_uqrshlsi_uus (__a, __b);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshld_u64 (uint64_t __a, uint64_t __b)
+vqrshld_u64 (uint64_t __a, int64_t __b)
{
return __builtin_aarch64_uqrshldi_uus (__a, __b);
}
@@ -24553,28 +24637,28 @@ vqshld_s64 (int64_t __a, int64_t __b)
__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlb_u8 (uint8_t __a, uint8_t __b)
+vqshlb_u8 (uint8_t __a, int8_t __b)
{
return __builtin_aarch64_uqshlqi_uus (__a, __b);
}
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlh_u16 (uint16_t __a, uint16_t __b)
+vqshlh_u16 (uint16_t __a, int16_t __b)
{
return __builtin_aarch64_uqshlhi_uus (__a, __b);
}
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshls_u32 (uint32_t __a, uint32_t __b)
+vqshls_u32 (uint32_t __a, int32_t __b)
{
return __builtin_aarch64_uqshlsi_uus (__a, __b);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshld_u64 (uint64_t __a, uint64_t __b)
+vqshld_u64 (uint64_t __a, int64_t __b)
{
return __builtin_aarch64_uqshldi_uus (__a, __b);
}
@@ -26003,6 +26087,13 @@ vrndmq_f64 (float64x2_t __a)
/* vrndn */
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrndns_f32 (float32_t __a)
+{
+ return __builtin_aarch64_frintnsf (__a);
+}
+
__extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndn_f32 (float32x2_t __a)
@@ -26908,7 +26999,7 @@ vshld_s64 (int64_t __a, int64_t __b)
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshld_u64 (uint64_t __a, uint64_t __b)
+vshld_u64 (uint64_t __a, int64_t __b)
{
return __builtin_aarch64_ushldi_uus (__a, __b);
}
@@ -30104,6 +30195,13 @@ vst4q_p64 (poly64_t * __a, poly64x2x4_t __val)
__builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstrq_p128 (poly128_t * __ptr, poly128_t __val)
+{
+ *__ptr = __val;
+}
+
/* vsub */
__extension__ extern __inline int64_t
@@ -30491,6 +30589,17 @@ vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
#endif
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
@@ -30761,6 +30870,18 @@ vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
#endif
}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
__extension__ extern __inline float16x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtrn_f16 (float16x4_t __a, float16x4_t __b)
@@ -31407,6 +31528,17 @@ vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
#endif
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vuzp2_f16 (float16x4_t __a, float16x4_t __b)
@@ -31666,6 +31798,17 @@ vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
#endif
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
__INTERLEAVE_LIST (uzp)
/* vzip */
@@ -31934,6 +32077,17 @@ vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
#endif
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
__extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vzip2_f16 (float16x4_t __a, float16x4_t __b)
@@ -32198,6 +32352,17 @@ vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
#endif
}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+ return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
__INTERLEAVE_LIST (zip)
#undef __INTERLEAVE_LIST
@@ -35659,6 +35824,55 @@ vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
#pragma GCC pop_options
+__extension__ extern __inline poly8x8_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+ return __a ^ __b;
+}
+
+__extension__ extern __inline poly16x4_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+ return __a ^ __b;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+ return __a ^ __b;
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+ return __a ^ __b;
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+ return __a ^__b;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+ return __a ^ __b;
+}
+
+__extension__ extern __inline poly128_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p128 (poly128_t __a, poly128_t __b)
+{
+ return __a ^ __b;
+}
+
#undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 33e8015..db505a4 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -811,23 +811,23 @@ arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
static enum arm_type_qualifiers
arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none};
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
static enum arm_type_qualifiers
arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned};
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
static enum arm_type_qualifiers
arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_void, qualifier_unsigned, qualifier_const,
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
qualifier_none, qualifier_unsigned};
#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
static enum arm_type_qualifiers
arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_void, qualifier_unsigned, qualifier_const,
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
qualifier_unsigned, qualifier_unsigned};
#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index c98f8ed..8c61ad0 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -135,10 +135,6 @@ define feature armv8_1m_main
# Floating point and Neon extensions.
# VFPv1 is not supported in GCC.
-# This feature bit is enabled for all VFP, MVE and
-# MVE with floating point extensions.
-define feature vfp_base
-
# Vector floating point v2.
define feature vfpv2
@@ -251,7 +247,7 @@ define fgroup ALL_SIMD ALL_SIMD_INTERNAL ALL_SIMD_EXTERNAL
# List of all FPU bits to strip out if -mfpu is used to override the
# default. fp16 is deliberately missing from this list.
-define fgroup ALL_FPU_INTERNAL vfp_base vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL
+define fgroup ALL_FPU_INTERNAL vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL
# Similarly, but including fp16 and other extensions that aren't part of
# -mfpu support.
define fgroup ALL_FPU_EXTERNAL fp16 bf16
@@ -296,11 +292,11 @@ define fgroup ARMv8r ARMv8a
define fgroup ARMv8_1m_main ARMv8m_main armv8_1m_main
# Useful combinations.
-define fgroup VFPv2 vfp_base vfpv2
+define fgroup VFPv2 vfpv2
define fgroup VFPv3 VFPv2 vfpv3
define fgroup VFPv4 VFPv3 vfpv4 fp16conv
define fgroup FPv5 VFPv4 fpv5
-define fgroup MVE mve vfp_base armv7em
+define fgroup MVE mve armv7em
define fgroup MVE_FP MVE FPv5 fp16 mve_float
define fgroup FP_DBL fp_dbl
@@ -310,6 +306,18 @@ define fgroup NEON FP_D32 neon
define fgroup CRYPTO NEON crypto
define fgroup DOTPROD NEON dotprod
+# Implied feature bits. These are for non-named features shared between fgroups.
+# Shared feature f belonging to fgroups A and B will be erroneously removed if:
+# A and B are enabled by default AND A is disabled by a removal flag.
+# To ensure that f is retained, we must add such bits to the ISA after
+# processing the removal flags. This is implemented by 'implied bits':
+# define implied <name> [<feature-or-fgroup>]+
+# This indicates that, if any of the listed features are enabled, or if any
+# member of a listed fgroup is enabled, then <name> will be implicitly enabled.
+
+# Enabled for all VFP, MVE and MVE with floating point extensions.
+define implied vfp_base MVE MVE_FP ALL_FP
+
# List of all quirk bits to strip out when comparing CPU features with
# architectures.
# xscale isn't really a 'quirk', but it isn't an architecture either and we
@@ -1447,6 +1455,39 @@ begin cpu cortex-a77
part d0d
end cpu cortex-a77
+begin cpu cortex-a78
+ cname cortexa78
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.2-a+fp16+dotprod
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d41
+end cpu cortex-a78
+
+begin cpu cortex-a78ae
+ cname cortexa78ae
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.2-a+fp16+dotprod
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d42
+end cpu cortex-a78ae
+
+begin cpu cortex-x1
+ cname cortexx1
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.2-a+fp16+dotprod
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d44
+end cpu cortex-x1
+
begin cpu neoverse-n1
cname neoversen1
alias !ares
@@ -1478,6 +1519,30 @@ begin cpu cortex-a76.cortex-a55
costs cortex_a57
end cpu cortex-a76.cortex-a55
+# Armv8.4 A-profile Architecture Processors
+begin cpu neoverse-v1
+ cname neoversev1
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.4-a+fp16+bf16+i8mm
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part 0xd40
+end cpu neoverse-v1
+
+# Armv8.5 A-profile Architecture Processors
+begin cpu neoverse-n2
+ cname neoversen2
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.5-a+fp16+bf16+i8mm
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part 0xd49
+end cpu neoverse-n2
+
# V8 M-profile implementations.
begin cpu cortex-m23
cname cortexm23
@@ -1508,6 +1573,10 @@ begin cpu cortex-m55
cname cortexm55
tune flags LDSCHED
architecture armv8.1-m.main+mve.fp+fp.dp
+ option nomve.fp remove mve_float
+ option nomve remove mve mve_float
+ option nofp remove ALL_FP mve_float
+ option nodsp remove MVE mve_float
isa quirk_no_asmcpu
costs v7m
vendor 41
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 0cc0ae7..703d616 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -120,7 +120,6 @@ extern int arm_coproc_mem_operand_no_writeback (rtx);
extern int arm_coproc_mem_operand_wb (rtx, int);
extern int neon_vector_mem_operand (rtx, int, bool);
extern int mve_vector_mem_operand (machine_mode, rtx, bool);
-bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);
extern int neon_struct_mem_operand (rtx);
extern rtx *neon_vcmla_lane_prepare_operands (rtx *);
@@ -373,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
#endif /* RTX_CODE */
extern bool arm_gen_setmem (rtx *);
+extern void arm_expand_vcond (rtx *, machine_mode);
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index ce35661..05f5c08 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -241,6 +241,15 @@ EnumValue
Enum(processor_type) String(cortex-a77) Value( TARGET_CPU_cortexa77)
EnumValue
+Enum(processor_type) String(cortex-a78) Value( TARGET_CPU_cortexa78)
+
+EnumValue
+Enum(processor_type) String(cortex-a78ae) Value( TARGET_CPU_cortexa78ae)
+
+EnumValue
+Enum(processor_type) String(cortex-x1) Value( TARGET_CPU_cortexx1)
+
+EnumValue
Enum(processor_type) String(neoverse-n1) Value( TARGET_CPU_neoversen1)
EnumValue
@@ -250,6 +259,12 @@ EnumValue
Enum(processor_type) String(cortex-a76.cortex-a55) Value( TARGET_CPU_cortexa76cortexa55)
EnumValue
+Enum(processor_type) String(neoverse-v1) Value( TARGET_CPU_neoversev1)
+
+EnumValue
+Enum(processor_type) String(neoverse-n2) Value( TARGET_CPU_neoversen2)
+
+EnumValue
Enum(processor_type) String(cortex-m23) Value( TARGET_CPU_cortexm23)
EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 8ea9435..32657da 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -45,7 +45,9 @@
cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,
cortexa73cortexa53,cortexa55,cortexa75,
cortexa76,cortexa76ae,cortexa77,
+ cortexa78,cortexa78ae,cortexx1,
neoversen1,cortexa75cortexa55,cortexa76cortexa55,
- cortexm23,cortexm33,cortexm35p,
- cortexm55,cortexr52"
+ neoversev1,neoversen2,cortexm23,
+ cortexm33,cortexm35p,cortexm55,
+ cortexr52"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 022ef6c..dfadaca 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3391,6 +3391,20 @@ arm_configure_build_target (struct arm_build_target *target,
bitmap_ior (target->isa, target->isa, fpu_bits);
}
+ /* There may be implied bits which we still need to enable. These are
+ non-named features which are needed to complete other sets of features,
+ but cannot be enabled from arm-cpus.in due to being shared between
+ multiple fgroups. Each entry in all_implied_fbits is of the form
+ ante -> cons, meaning that if the feature "ante" is enabled, we should
+ implicitly enable "cons". */
+ const struct fbit_implication *impl = all_implied_fbits;
+ while (impl->ante)
+ {
+ if (bitmap_bit_p (target->isa, impl->ante))
+ bitmap_set_bit (target->isa, impl->cons);
+ impl++;
+ }
+
if (!arm_selected_tune)
arm_selected_tune = arm_selected_cpu;
else /* Validate the features passed to -mtune. */
@@ -3415,8 +3429,9 @@ arm_option_override (void)
{
static const enum isa_feature fpu_bitlist_internal[]
= { ISA_ALL_FPU_INTERNAL, isa_nobit };
+ /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
static const enum isa_feature fp_bitlist[]
- = { ISA_ALL_FP, isa_nobit };
+ = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
cl_target_option opts;
@@ -13277,14 +13292,18 @@ arm_coproc_mem_operand_wb (rtx op, int wb_level)
/* Match:
(plus (reg)
- (const)). */
+ (const))
+
+ The encoded immediate for 16-bit modes is multiplied by 2,
+ while the encoded immediate for 32-bit and 64-bit modes is
+ multiplied by 4. */
+ int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
if (GET_CODE (ind) == PLUS
&& REG_P (XEXP (ind, 0))
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
&& CONST_INT_P (XEXP (ind, 1))
- && INTVAL (XEXP (ind, 1)) > -1024
- && INTVAL (XEXP (ind, 1)) < 1024
- && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+ && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
+ && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
return TRUE;
return FALSE;
@@ -28946,6 +28965,30 @@ arm_preferred_simd_mode (scalar_mode mode)
default:;
}
+ if (TARGET_HAVE_MVE)
+ switch (mode)
+ {
+ case E_QImode:
+ return V16QImode;
+ case E_HImode:
+ return V8HImode;
+ case E_SImode:
+ return V4SImode;
+
+ default:;
+ }
+
+ if (TARGET_HAVE_MVE_FLOAT)
+ switch (mode)
+ {
+ case E_HFmode:
+ return V8HFmode;
+ case E_SFmode:
+ return V4SFmode;
+
+ default:;
+ }
+
return word_mode;
}
@@ -30630,6 +30673,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
arm_post_atomic_barrier (model);
}
+/* Expand code to compare vectors OP0 and OP1 using condition CODE.
+ If CAN_INVERT, store either the result or its inverse in TARGET
+ and return true if TARGET contains the inverse. If !CAN_INVERT,
+ always store the result in TARGET, never its inverse.
+
+ Note that the handling of floating-point comparisons is not
+ IEEE compliant. */
+
+bool
+arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
+ bool can_invert)
+{
+ machine_mode cmp_result_mode = GET_MODE (target);
+ machine_mode cmp_mode = GET_MODE (op0);
+
+ bool inverted;
+ switch (code)
+ {
+ /* For these we need to compute the inverse of the requested
+ comparison. */
+ case UNORDERED:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ case UNEQ:
+ case NE:
+ code = reverse_condition_maybe_unordered (code);
+ if (!can_invert)
+ {
+ /* Recursively emit the inverted comparison into a temporary
+ and then store its inverse in TARGET. This avoids reusing
+ TARGET (which for integer NE could be one of the inputs). */
+ rtx tmp = gen_reg_rtx (cmp_result_mode);
+ if (arm_expand_vector_compare (tmp, code, op0, op1, true))
+ gcc_unreachable ();
+ emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
+ return false;
+ }
+ inverted = true;
+ break;
+
+ default:
+ inverted = false;
+ break;
+ }
+
+ switch (code)
+ {
+ /* These are natively supported for zero comparisons, but otherwise
+ require the operands to be swapped. */
+ case LE:
+ case LT:
+ if (op1 != CONST0_RTX (cmp_mode))
+ {
+ code = swap_condition (code);
+ std::swap (op0, op1);
+ }
+ /* Fall through. */
+
+ /* These are natively supported for both register and zero operands. */
+ case EQ:
+ case GE:
+ case GT:
+ emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
+ return inverted;
+
+ /* These are natively supported for register operands only.
+ Comparisons with zero aren't useful and should be folded
+ or canonicalized by target-independent code. */
+ case GEU:
+ case GTU:
+ emit_insn (gen_neon_vc (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
+ return inverted;
+
+ /* These require the operands to be swapped and likewise do not
+ support comparisons with zero. */
+ case LEU:
+ case LTU:
+ emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
+ target, force_reg (cmp_mode, op1), op0));
+ return inverted;
+
+ /* These need a combination of two comparisons. */
+ case LTGT:
+ case ORDERED:
+ {
+ /* Operands are LTGT iff (a > b || a > b).
+ Operands are ORDERED iff (a > b || a <= b). */
+ rtx gt_res = gen_reg_rtx (cmp_result_mode);
+ rtx alt_res = gen_reg_rtx (cmp_result_mode);
+ rtx_code alt_code = (code == LTGT ? LT : LE);
+ if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+ || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
+ gcc_unreachable ();
+ emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
+ gt_res, alt_res)));
+ return inverted;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expand a vcond or vcondu pattern with operands OPERANDS.
+ CMP_RESULT_MODE is the mode of the comparison result. */
+
+void
+arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
+{
+ rtx mask = gen_reg_rtx (cmp_result_mode);
+ bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
+ operands[4], operands[5], true);
+ if (inverted)
+ std::swap (operands[1], operands[2]);
+ emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
+ mask, operands[1], operands[2]));
+}
+
#define MAX_VECT_LEN 16
struct expand_vec_perm_d
@@ -33112,9 +33276,7 @@ arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
= smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
- libval_mode,
- op0, GET_MODE (op0),
- op1, GET_MODE (op1));
+ libval_mode, op0, mode, op1, mode);
rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
@@ -33578,17 +33740,4 @@ arm_mode_base_reg_class (machine_mode mode)
struct gcc_target targetm = TARGET_INITIALIZER;
-bool
-arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1)
-{
- if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))
- return true;
- else if (mode == E_BFmode)
- return false;
- else if ((s_register_operand (op0, mode) && MEM_P (op1))
- || (s_register_operand (op1, mode) && MEM_P (op0)))
- return false;
- return true;
-}
-
#include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index f4d3676..4a63d33 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1110,6 +1110,47 @@ extern const int arm_arch_cde_coproc_bits[];
#define VALID_MVE_STRUCT_MODE(MODE) \
((MODE) == TImode || (MODE) == OImode || (MODE) == XImode)
+/* The conditions under which vector modes are supported for general
+ arithmetic using Neon. */
+
+#define ARM_HAVE_NEON_V8QI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V4HI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V2SI_ARITH TARGET_NEON
+
+#define ARM_HAVE_NEON_V16QI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V8HI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V4SI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V2DI_ARITH TARGET_NEON
+
+/* HF operations have their own flush-to-zero control (FPSCR.FZ16). */
+#define ARM_HAVE_NEON_V4HF_ARITH TARGET_NEON_FP16INST
+#define ARM_HAVE_NEON_V8HF_ARITH TARGET_NEON_FP16INST
+
+/* SF operations always flush to zero, regardless of FPSCR.FZ, so we can
+ only use them for general arithmetic when -funsafe-math-optimizations
+ is in effect. */
+#define ARM_HAVE_NEON_V2SF_ARITH \
+ (TARGET_NEON && flag_unsafe_math_optimizations)
+#define ARM_HAVE_NEON_V4SF_ARITH ARM_HAVE_NEON_V2SF_ARITH
+
+/* The conditions under which vector modes are supported for general
+ arithmetic by any vector extension. */
+
+#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT)
+
+#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V4SI_ARITH (ARM_HAVE_NEON_V4SI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V2DI_ARITH ARM_HAVE_NEON_V2DI_ARITH
+
+#define ARM_HAVE_V4HF_ARITH ARM_HAVE_NEON_V4HF_ARITH
+#define ARM_HAVE_V2SF_ARITH ARM_HAVE_NEON_V2SF_ARITH
+
+#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT)
+#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT)
+
/* The register numbers in sequence, for passing to arm_gen_load_multiple. */
extern int arm_regs_in_sequence[];
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index bffdb0b..1a8e498 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -7289,7 +7289,9 @@
(define_insn "*arm32_mov<mode>"
[(set (match_operand:HFBF 0 "nonimmediate_operand" "=r,m,r,r")
(match_operand:HFBF 1 "general_operand" " m,r,r,F"))]
- "TARGET_32BIT && !TARGET_HARD_FLOAT
+ "TARGET_32BIT
+ && !TARGET_HARD_FLOAT
+ && !TARGET_HAVE_MVE
&& ( s_register_operand (operands[0], <MODE>mode)
|| s_register_operand (operands[1], <MODE>mode))"
"*
@@ -7355,7 +7357,7 @@
if (arm_disable_literal_pool
&& (REG_P (operands[0]) || SUBREG_P (operands[0]))
&& CONST_DOUBLE_P (operands[1])
- && TARGET_HARD_FLOAT
+ && TARGET_VFP_BASE
&& !vfp3_const_double_rtx (operands[1]))
{
rtx clobreg = gen_reg_rtx (SFmode);
@@ -7452,7 +7454,7 @@
if (arm_disable_literal_pool
&& (REG_P (operands[0]) || SUBREG_P (operands[0]))
&& CONSTANT_P (operands[1])
- && TARGET_HARD_FLOAT
+ && TARGET_VFP_BASE
&& !arm_const_double_rtx (operands[1])
&& !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1])))
{
@@ -9212,7 +9214,7 @@
operands[2] = operands[1];
else
{
- rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+ rtx mem = force_const_mem (SImode, operands[1]);
emit_move_insn (operands[2], mem);
}
}
@@ -9295,7 +9297,7 @@
operands[3] = operands[1];
else
{
- rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+ rtx mem = force_const_mem (SImode, operands[1]);
emit_move_insn (operands[3], mem);
}
}
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index a801705..6c0d1e2 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -141,6 +141,7 @@
#define vrev64q_m(__inactive, __a, __p) __arm_vrev64q_m(__inactive, __a, __p)
#define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c)
#define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c)
+#define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c)
#define vqdmlahq(__a, __b, __c) __arm_vqdmlahq(__a, __b, __c)
#define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
#define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
@@ -260,6 +261,7 @@
#define vorrq_m(__inactive, __a, __b, __p) __arm_vorrq_m(__inactive, __a, __b, __p)
#define vqaddq_m(__inactive, __a, __b, __p) __arm_vqaddq_m(__inactive, __a, __b, __p)
#define vqdmladhq_m(__inactive, __a, __b, __p) __arm_vqdmladhq_m(__inactive, __a, __b, __p)
+#define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b, __c, __p)
#define vqdmladhxq_m(__inactive, __a, __b, __p) __arm_vqdmladhxq_m(__inactive, __a, __b, __p)
#define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c, __p)
#define vqdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m(__inactive, __a, __b, __p)
@@ -643,6 +645,7 @@
#define vcvtpq_u16_f16(__a) __arm_vcvtpq_u16_f16(__a)
#define vcvtpq_u32_f32(__a) __arm_vcvtpq_u32_f32(__a)
#define vcvtnq_u16_f16(__a) __arm_vcvtnq_u16_f16(__a)
+#define vcvtnq_u32_f32(__a) __arm_vcvtnq_u32_f32(__a)
#define vcvtmq_u16_f16(__a) __arm_vcvtmq_u16_f16(__a)
#define vcvtmq_u32_f32(__a) __arm_vcvtmq_u32_f32(__a)
#define vcvtaq_u16_f16(__a) __arm_vcvtaq_u16_f16(__a)
@@ -1234,9 +1237,6 @@
#define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
#define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
#define vrev64q_m_u8(__inactive, __a, __p) __arm_vrev64q_m_u8(__inactive, __a, __p)
-#define vqrdmlashq_n_u8(__a, __b, __c) __arm_vqrdmlashq_n_u8(__a, __b, __c)
-#define vqrdmlahq_n_u8(__a, __b, __c) __arm_vqrdmlahq_n_u8(__a, __b, __c)
-#define vqdmlahq_n_u8(__a, __b, __c) __arm_vqdmlahq_n_u8(__a, __b, __c)
#define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
#define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
#define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
@@ -1306,6 +1306,7 @@
#define vqdmlsdhxq_s8(__inactive, __a, __b) __arm_vqdmlsdhxq_s8(__inactive, __a, __b)
#define vqdmlsdhq_s8(__inactive, __a, __b) __arm_vqdmlsdhq_s8(__inactive, __a, __b)
#define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c)
+#define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, __c)
#define vqdmladhxq_s8(__inactive, __a, __b) __arm_vqdmladhxq_s8(__inactive, __a, __b)
#define vqdmladhq_s8(__inactive, __a, __b) __arm_vqdmladhq_s8(__inactive, __a, __b)
#define vmlsdavaxq_s8(__a, __b, __c) __arm_vmlsdavaxq_s8(__a, __b, __c)
@@ -1319,9 +1320,6 @@
#define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
#define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
#define vrev64q_m_u16(__inactive, __a, __p) __arm_vrev64q_m_u16(__inactive, __a, __p)
-#define vqrdmlashq_n_u16(__a, __b, __c) __arm_vqrdmlashq_n_u16(__a, __b, __c)
-#define vqrdmlahq_n_u16(__a, __b, __c) __arm_vqrdmlahq_n_u16(__a, __b, __c)
-#define vqdmlahq_n_u16(__a, __b, __c) __arm_vqdmlahq_n_u16(__a, __b, __c)
#define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
#define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
#define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
@@ -1390,6 +1388,7 @@
#define vqrdmladhq_s16(__inactive, __a, __b) __arm_vqrdmladhq_s16(__inactive, __a, __b)
#define vqdmlsdhxq_s16(__inactive, __a, __b) __arm_vqdmlsdhxq_s16(__inactive, __a, __b)
#define vqdmlsdhq_s16(__inactive, __a, __b) __arm_vqdmlsdhq_s16(__inactive, __a, __b)
+#define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a, __b, __c)
#define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, __c)
#define vqdmladhxq_s16(__inactive, __a, __b) __arm_vqdmladhxq_s16(__inactive, __a, __b)
#define vqdmladhq_s16(__inactive, __a, __b) __arm_vqdmladhq_s16(__inactive, __a, __b)
@@ -1404,9 +1403,6 @@
#define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
#define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
#define vrev64q_m_u32(__inactive, __a, __p) __arm_vrev64q_m_u32(__inactive, __a, __p)
-#define vqrdmlashq_n_u32(__a, __b, __c) __arm_vqrdmlashq_n_u32(__a, __b, __c)
-#define vqrdmlahq_n_u32(__a, __b, __c) __arm_vqrdmlahq_n_u32(__a, __b, __c)
-#define vqdmlahq_n_u32(__a, __b, __c) __arm_vqdmlahq_n_u32(__a, __b, __c)
#define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
#define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
#define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
@@ -1475,6 +1471,7 @@
#define vqrdmladhq_s32(__inactive, __a, __b) __arm_vqrdmladhq_s32(__inactive, __a, __b)
#define vqdmlsdhxq_s32(__inactive, __a, __b) __arm_vqdmlsdhxq_s32(__inactive, __a, __b)
#define vqdmlsdhq_s32(__inactive, __a, __b) __arm_vqdmlsdhq_s32(__inactive, __a, __b)
+#define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a, __b, __c)
#define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, __c)
#define vqdmladhxq_s32(__inactive, __a, __b) __arm_vqdmladhxq_s32(__inactive, __a, __b)
#define vqdmladhq_s32(__inactive, __a, __b) __arm_vqdmladhq_s32(__inactive, __a, __b)
@@ -1901,6 +1898,9 @@
#define vqdmladhxq_m_s8(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s8(__inactive, __a, __b, __p)
#define vqdmladhxq_m_s32(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s32(__inactive, __a, __b, __p)
#define vqdmladhxq_m_s16(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s16(__inactive, __a, __b, __p)
+#define vqdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p)
+#define vqdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p)
+#define vqdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p)
#define vqdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p)
#define vqdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p)
#define vqdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p)
@@ -2024,8 +2024,6 @@
#define vmlaldavaq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaq_p_u16(__a, __b, __c, __p)
#define vmlaldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s32(__a, __b, __c, __p)
#define vmlaldavaxq_p_s16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s16(__a, __b, __c, __p)
-#define vmlaldavaxq_p_u32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_u32(__a, __b, __c, __p)
-#define vmlaldavaxq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_u16(__a, __b, __c, __p)
#define vmlsldavaq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaq_p_s32(__a, __b, __c, __p)
#define vmlsldavaq_p_s16(__a, __b, __c, __p) __arm_vmlsldavaq_p_s16(__a, __b, __c, __p)
#define vmlsldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaxq_p_s32(__a, __b, __c, __p)
@@ -6961,27 +6959,6 @@ __arm_vrev64q_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __builtin_mve_vqrdmlashq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __builtin_mve_vqrdmlahq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __builtin_mve_vqdmlahq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
{
return __builtin_mve_vmvnq_m_uv16qi (__inactive, __a, __p);
@@ -7424,6 +7401,13 @@ __arm_vqrdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
+{
+ return __builtin_mve_vqdmlashq_n_sv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
{
return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c);
@@ -7557,27 +7541,6 @@ __arm_vrev64q_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __builtin_mve_vqrdmlashq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __builtin_mve_vqrdmlahq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __builtin_mve_vqdmlahq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
{
return __builtin_mve_vmvnq_m_uv8hi (__inactive, __a, __p);
@@ -8019,6 +7982,13 @@ __arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+ return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
{
return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
@@ -8152,27 +8122,6 @@ __arm_vrev64q_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __builtin_mve_vqrdmlashq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __builtin_mve_vqrdmlahq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __builtin_mve_vqdmlahq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
{
return __builtin_mve_vmvnq_m_uv4si (__inactive, __a, __p);
@@ -8614,6 +8563,13 @@ __arm_vqrdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+ return __builtin_mve_vqdmlashq_n_sv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
{
return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c);
@@ -11141,6 +11097,27 @@ __arm_vqrdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vqdmlashq_m_n_sv16qi (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vqdmlashq_m_n_sv8hi (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
{
return __builtin_mve_vqrdmlsdhq_m_sv16qi (__inactive, __a, __b, __p);
@@ -11811,20 +11788,6 @@ __arm_vmlaldavaxq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t
return __builtin_mve_vmlaldavaxq_p_sv8hi (__a, __b, __c, __p);
}
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __builtin_mve_vmlaldavaxq_p_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __builtin_mve_vmlaldavaxq_p_uv8hi (__a, __b, __c, __p);
-}
-
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmlsldavaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -13993,64 +13956,56 @@ __extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
{
- __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
- __builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr);
+ *__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
{
- __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
- __builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr);
+ *__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
{
- __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
- __builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p);
+ *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
{
- __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
- __builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p);
+ *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
{
- __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
- __builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p);
+ *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
{
- __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
- __builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p);
+ *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
{
- __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
- __builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr);
+ *__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
{
- __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
- __builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr);
+ *__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
}
__extension__ extern __inline uint8x16_t
@@ -17012,6 +16967,13 @@ __arm_vcvtnq_u16_f16 (float16x8_t __a)
return __builtin_mve_vcvtnq_uv8hi (__a);
}
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtnq_u32_f32 (float32x4_t __a)
+{
+ return __builtin_mve_vcvtnq_uv4si (__a);
+}
+
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcvtmq_u16_f16 (float16x8_t __a)
@@ -19158,16 +19120,14 @@ __extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
{
- __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
- __builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr);
+ *__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
{
- __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
- __builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p);
+ *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
}
__extension__ extern __inline float16x8_t
@@ -23742,27 +23702,6 @@ __arm_vrev64q_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqrdmlashq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqrdmlahq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqdmlahq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
{
return __arm_vmvnq_m_u8 (__inactive, __a, __p);
@@ -24204,6 +24143,13 @@ __arm_vqrdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
+{
+ return __arm_vqdmlashq_n_s8 (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
{
return __arm_vqrdmlahq_n_s8 (__a, __b, __c);
@@ -24337,27 +24283,6 @@ __arm_vrev64q_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqrdmlashq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqrdmlahq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqdmlahq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
{
return __arm_vmvnq_m_u16 (__inactive, __a, __p);
@@ -24799,6 +24724,13 @@ __arm_vqrdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+ return __arm_vqdmlashq_n_s16 (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
{
return __arm_vqrdmlahq_n_s16 (__a, __b, __c);
@@ -24932,27 +24864,6 @@ __arm_vrev64q_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqrdmlashq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqrdmlahq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqdmlahq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
{
return __arm_vmvnq_m_u32 (__inactive, __a, __p);
@@ -25394,6 +25305,13 @@ __arm_vqrdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+ return __arm_vqdmlashq_n_s32 (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
{
return __arm_vqrdmlahq_n_s32 (__a, __b, __c);
@@ -27921,6 +27839,27 @@ __arm_vqrdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s8 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s16 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vqrdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
{
return __arm_vqrdmlsdhq_m_s8 (__inactive, __a, __b, __p);
@@ -28591,20 +28530,6 @@ __arm_vmlaldavaxq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p
return __arm_vmlaldavaxq_p_s16 (__a, __b, __c, __p);
}
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_u16 (__a, __b, __c, __p);
-}
-
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vmlsldavaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -35651,6 +35576,7 @@ enum {
short: __ARM_mve_type_int_n, \
int: __ARM_mve_type_int_n, \
long: __ARM_mve_type_int_n, \
+ double: __ARM_mve_type_fp_n, \
long long: __ARM_mve_type_int_n, \
unsigned char: __ARM_mve_type_int_n, \
unsigned short: __ARM_mve_type_int_n, \
@@ -35723,6 +35649,8 @@ extern void *__ARM_undef;
_Generic(param, type: param, default: *(type *)__ARM_undef)
#define __ARM_mve_coerce1(param, type) \
_Generic(param, type: param, const type: param, default: *(type *)__ARM_undef)
+#define __ARM_mve_coerce2(param, type) \
+ _Generic(param, type: param, float16_t: param, float32_t: param, default: *(type *)__ARM_undef)
#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
@@ -35939,14 +35867,14 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_f16 (__ARM_mve_coerce(p0, float16x8_t), __ARM_mve_coerce(p1, float16x8_t)), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_f32 (__ARM_mve_coerce(p0, float32x4_t), __ARM_mve_coerce(p1, float32x4_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
#define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -35997,8 +35925,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -36029,8 +35957,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -36069,8 +35997,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t), p2), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpeqq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpeqq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
#define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36083,8 +36011,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
#define __arm_vcmpleq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36097,8 +36025,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
#define __arm_vcmpltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36111,8 +36039,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpltq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpltq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36123,8 +36051,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -36179,8 +36107,8 @@ extern void *__ARM_undef;
#define __arm_vmaxnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
#define __arm_vmaxnmq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36191,14 +36119,14 @@ extern void *__ARM_undef;
#define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
#define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
#define __arm_vminnmaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36209,8 +36137,8 @@ extern void *__ARM_undef;
#define __arm_vminnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
#define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -36232,8 +36160,8 @@ extern void *__ARM_undef;
#define __arm_vsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
@@ -36252,8 +36180,8 @@ extern void *__ARM_undef;
#define __arm_vminnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
#define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -36782,10 +36710,15 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
+
+#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
#define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36793,10 +36726,7 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
#define __arm_vmlasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -36815,10 +36745,7 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
#define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37011,8 +36938,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
@@ -37027,8 +36954,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
#define __arm_vcmpltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37041,8 +36968,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
#define __arm_vcmpneq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37061,8 +36988,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t), p2), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t), p2), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t), p2), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
#define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37116,8 +37043,8 @@ extern void *__ARM_undef;
__typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t)), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double)), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
@@ -37132,8 +37059,8 @@ extern void *__ARM_undef;
__typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t)));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double)));})
#define __arm_vmaxnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37156,14 +37083,14 @@ extern void *__ARM_undef;
#define __arm_vmaxnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
#define __arm_vmaxnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
#define __arm_vminnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37174,14 +37101,14 @@ extern void *__ARM_undef;
#define __arm_vminnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
#define __arm_vminnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
#define __arm_vrndnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37248,8 +37175,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
#define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37353,8 +37280,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
@@ -37389,8 +37316,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vandq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37531,15 +37458,15 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vfmasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vfmsq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37580,8 +37507,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -37614,8 +37541,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vorrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -38113,8 +38040,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vandq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -38248,8 +38175,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmulq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmulq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vnegq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
@@ -38337,8 +38264,8 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vsubq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vsubq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
- int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
#define __arm_vcmulq_rot90_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -38370,8 +38297,8 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint64x2_t), p2), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
- int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+ int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
#else /* MVE Integer. */
@@ -38895,12 +38822,12 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));})
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)));})
#define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -39254,10 +39181,15 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
+
+#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
#define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -39265,10 +39197,7 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
#define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -39399,10 +39328,7 @@ extern void *__ARM_undef;
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
#define __arm_vqdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -40800,6 +40726,14 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+#define __arm_vqdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+
#define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -41057,9 +40991,7 @@ extern void *__ARM_undef;
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_p_s16 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavaxq_p_u16 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavaxq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
#define __arm_vmlsldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -41679,16 +41611,16 @@ extern void *__ARM_undef;
#define __arm_vmaxavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t)));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)));})
#define __arm_vmaxavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_p_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -41703,36 +41635,36 @@ extern void *__ARM_undef;
#define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_u32 (__p0,__ARM_mve_coerce(__p1, uint32x4_t)));})
#define __arm_vmaxvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_p_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_p_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_p_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_p_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_p_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_p_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_p_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), p2));})
#define __arm_vminavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t)));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)));})
#define __arm_vminavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_p_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
@@ -41747,22 +41679,22 @@ extern void *__ARM_undef;
#define __arm_vminvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t)), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t)));})
#define __arm_vminvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_p_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_p_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_p_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_p_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
- int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_p_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_p_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_p_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), p2));})
#define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 753e40a..f38926f 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -312,9 +312,6 @@ VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, v4si, v2di)
VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, v2di)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlashq_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlahq_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqdmlahq_n_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaq_n_u, v16qi, v8hi, v4si)
@@ -384,6 +381,7 @@ VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqrdmladhq_s, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhxq_s, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhq_s, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlahq_n_s, v16qi, v8hi, v4si)
+VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlashq_n_s, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhxq_s, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhq_s, v16qi, v8hi, v4si)
VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmlsdavaxq_s, v16qi, v8hi, v4si)
@@ -574,6 +572,7 @@ VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_n_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhxq_m_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhq_m_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlahq_m_n_s, v16qi, v8hi, v4si)
+VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlashq_m_n_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhxq_m_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhq_m_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_s, v16qi, v8hi, v4si)
@@ -615,7 +614,6 @@ VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vrshrq_m_n_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshlq_m_n_s, v16qi, v8hi, v4si)
VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulltq_poly_m_p, v16qi, v8hi)
VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmullbq_poly_m_p, v16qi, v8hi)
-VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaxq_p_u, v8hi, v4si)
VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaq_p_u, v8hi, v4si)
VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrntq_m_n_u, v8hi, v4si)
VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrnbq_m_n_u, v8hi, v4si)
@@ -828,19 +826,9 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si)
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf)
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di)
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf)
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di)
VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index ff229aa..789e333 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -454,10 +454,13 @@
(define_memory_constraint "Uj"
"@internal
- In ARM/Thumb-2 state an VFP load/store address which does not support
- writeback at all (eg vldr.16)."
+ In ARM/Thumb-2 state a VFP load/store address that supports writeback
+ for Neon but not for MVE"
(and (match_code "mem")
- (match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)")))
+ (match_test "TARGET_32BIT")
+ (match_test "TARGET_HAVE_MVE
+ ? arm_coproc_mem_operand_no_writeback (op)
+ : neon_vector_mem_operand (op, 2, true)")))
(define_memory_constraint "Uy"
"@internal
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 0bc9eba..f934872 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -66,14 +66,6 @@
;; Integer and float modes supported by Neon and IWMMXT.
(define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
-;; Integer and float modes supported by Neon, IWMMXT and MVE, used by
-;; arithmetic epxand patterns.
-(define_mode_iterator VNIM [V16QI V8HI V4SI V4SF])
-
-;; Integer and float modes supported by Neon and IWMMXT but not MVE, used by
-;; arithmetic epxand patterns.
-(define_mode_iterator VNINOTM [V2SI V4HI V8QI V2SF V2DI])
-
;; Integer and float modes supported by Neon, IWMMXT and MVE.
(define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI])
@@ -267,6 +259,16 @@
(define_mode_iterator VBFCVT [V4BF V8BF])
(define_mode_iterator VBFCVTM [V2SI SF])
+;; MVE mode iterator.
+(define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
+(define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
+(define_mode_iterator MVE_0 [V8HF V4SF])
+(define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
+(define_mode_iterator MVE_3 [V16QI V8HI])
+(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
+(define_mode_iterator MVE_5 [V8HI V4SI])
+(define_mode_iterator MVE_6 [V8HI V4SI])
+
;;----------------------------------------------------------------------------
;; Code iterators
;;----------------------------------------------------------------------------
@@ -901,6 +903,35 @@
(define_mode_attr cde_suffix [(SI "") (DI "d")])
(define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")])
+;;MVE mode attribute.
+(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI")
+ (V4SF "V4SI")])
+(define_mode_attr MVE_LANES [(V16QI "16") (V8HI "8") (V4SI "4")])
+
+(define_mode_attr MVE_constraint [ (V16QI "Ra") (V8HI "Rc") (V4SI "Re")])
+(define_mode_attr MVE_constraint1 [ (V8HI "Ra") (V4SI "Rc")])
+(define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf")
+ (V8HF "Rd") (V4SF "Rf")])
+(define_mode_attr MVE_constraint3 [ (V8HI "Rb") (V4SI "Rd")])
+
+(define_mode_attr MVE_pred [ (V16QI "mve_imm_7") (V8HI "mve_imm_15")
+ (V4SI "mve_imm_31")])
+(define_mode_attr MVE_pred1 [ (V8HI "mve_imm_7") (V4SI "mve_imm_15")])
+(define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16")
+ (V4SI "mve_imm_32")
+ (V8HF "mve_imm_16") (V4SF "mve_imm_32")])
+(define_mode_attr MVE_pred3 [ (V8HI "mve_imm_8") (V4SI "mve_imm_16")])
+
+(define_mode_attr MVE_B_ELEM [ (V16QI "V16QI") (V8HI "V8QI") (V4SI "V4QI")])
+(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")])
+
+(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI "h") (V4SI "w") (V8HF "h")
+ (V4SF "w")])
+(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
+ (V8HF "u16") (V4SF "32")])
+(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
+ (V8HF "=w") (V4SF "=&w")])
+
;;----------------------------------------------------------------------------
;; Code attributes
;;----------------------------------------------------------------------------
@@ -1181,6 +1212,188 @@
(define_int_attr mmla_sfx [(UNSPEC_MATMUL_S "s8") (UNSPEC_MATMUL_U "u8")
(UNSPEC_MATMUL_US "s8")])
+;;MVE int attribute.
+(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
+ (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
+ (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s")
+ (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u")
+ (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s")
+ (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u")
+ (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s")
+ (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
+ (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
+ (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
+ (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
+ (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
+ (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
+ (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
+ (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u")
+ (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s")
+ (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
+ (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
+ (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
+ (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
+ (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
+ (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
+ (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
+ (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
+ (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
+ (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
+ (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
+ (VHADDQ_U "u") (VHSUBQ_N_S "s") (VHSUBQ_N_U "u")
+ (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u")
+ (VMAXVQ_S "s") (VMAXVQ_U "u") (VMINQ_S "s") (VMINQ_U "u")
+ (VMINVQ_S "s") (VMINVQ_U "u") (VMLADAVQ_S "s")
+ (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
+ (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s")
+ (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u")
+ (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
+ (VMULQ_U "u") (VORNQ_S "s") (VORNQ_U "u") (VORRQ_S "s")
+ (VORRQ_U "u") (VQADDQ_N_S "s") (VQADDQ_N_U "u")
+ (VQRSHLQ_N_S "s") (VQRSHLQ_N_U "u") (VQRSHLQ_S "s")
+ (VQRSHLQ_U "u") (VQSHLQ_N_S "s") (VQSHLQ_N_U "u")
+ (VQSHLQ_R_S "s") (VQSHLQ_R_U "u") (VQSHLQ_S "s")
+ (VQSHLQ_U "u") (VQSUBQ_N_S "s") (VQSUBQ_N_U "u")
+ (VQSUBQ_S "s") (VQSUBQ_U "u") (VRHADDQ_S "s")
+ (VRHADDQ_U "u") (VRMULHQ_S "s") (VRMULHQ_U "u")
+ (VRSHLQ_N_S "s") (VRSHLQ_N_U "u") (VRSHLQ_S "s")
+ (VRSHLQ_U "u") (VRSHRQ_N_S "s") (VRSHRQ_N_U "u")
+ (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s")
+ (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u")
+ (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s")
+ (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u")
+ (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u")
+ (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s")
+ (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u")
+ (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u")
+ (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s")
+ (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s")
+ (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u")
+ (VRMLALDAVHQ_S "s") (VBICQ_M_N_S "s") (VBICQ_M_N_U "u")
+ (VCVTAQ_M_S "s") (VCVTAQ_M_U "u") (VCVTQ_M_TO_F_S "s")
+ (VCVTQ_M_TO_F_U "u") (VQRSHRNBQ_N_S "s")
+ (VQRSHRNBQ_N_U "u") (VABAVQ_S "s") (VABAVQ_U "u")
+ (VRMLALDAVHAQ_U "u") (VRMLALDAVHAQ_S "s") (VSHLCQ_S "s")
+ (VSHLCQ_U "u") (VADDVAQ_P_S "s") (VADDVAQ_P_U "u")
+ (VCLZQ_M_S "s") (VCLZQ_M_U "u") (VCMPEQQ_M_N_S "s")
+ (VCMPEQQ_M_N_U "u") (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u")
+ (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u") (VCMPNEQ_M_S "s")
+ (VCMPNEQ_M_U "u") (VDUPQ_M_N_S "s") (VDUPQ_M_N_U "u")
+ (VMAXVQ_P_S "s") (VMAXVQ_P_U "u") (VMINVQ_P_S "s")
+ (VMINVQ_P_U "u") (VMLADAVAQ_S "s") (VMLADAVAQ_U "u")
+ (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s")
+ (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
+ (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
+ (VPSELQ_U "u") (VQDMLAHQ_N_S "s")
+ (VQDMLASHQ_N_S "s")
+ (VQRDMLAHQ_N_S "s")
+ (VQRDMLASHQ_N_S "s")
+ (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
+ (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s")
+ (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u")
+ (VRSHLQ_M_N_S "s") (VRSHLQ_M_N_U "u") (VSHLQ_M_R_S "s")
+ (VSHLQ_M_R_U "u") (VSLIQ_N_S "s") (VSLIQ_N_U "u")
+ (VMLALDAVQ_P_S "s") (VQMOVNBQ_M_S "s") (VMOVLTQ_M_S "s")
+ (VMOVNBQ_M_S "s") (VRSHRNTQ_N_S "s") (VORRQ_M_N_S "s")
+ (VREV32Q_M_S "s") (VQRSHRNTQ_N_S "s") (VMOVNTQ_M_S "s")
+ (VMOVLBQ_M_S "s") (VMLALDAVAQ_S "s") (VQSHRNBQ_N_S "s")
+ (VSHRNBQ_N_S "s") (VRSHRNBQ_N_S "s") (VMLALDAVXQ_P_S "s")
+ (VQMOVNTQ_M_S "s") (VMVNQ_M_N_S "s") (VQSHRNTQ_N_S "s")
+ (VMLALDAVAXQ_S "s") (VSHRNTQ_N_S "s") (VMLALDAVQ_P_U "u")
+ (VQMOVNBQ_M_U "u") (VMOVLTQ_M_U "u") (VMOVNBQ_M_U "u")
+ (VRSHRNTQ_N_U "u") (VORRQ_M_N_U "u") (VREV32Q_M_U "u")
+ (VREV16Q_M_S "s") (VREV16Q_M_U "u")
+ (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u")
+ (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u")
+ (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u")
+ (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u")
+ (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s")
+ (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u")
+ (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s")
+ (VCVTQ_M_N_FROM_F_U "u") (VCVTQ_M_FROM_F_S "s")
+ (VCVTQ_M_FROM_F_U "u") (VRMLALDAVHQ_P_U "u")
+ (VRMLALDAVHQ_P_S "s") (VADDLVAQ_P_U "u")
+ (VCVTQ_M_N_FROM_F_S "s") (VABAVQ_P_U "u")
+ (VABAVQ_P_S "s") (VSHLQ_M_S "s") (VSHLQ_M_U "u")
+ (VSRIQ_M_N_S "s") (VSRIQ_M_N_U "u") (VSUBQ_M_S "s")
+ (VSUBQ_M_U "u") (VCVTQ_M_N_TO_F_S "s")
+ (VCVTQ_M_N_TO_F_U "u") (VADDQ_M_N_U "u")
+ (VSHLQ_M_N_S "s") (VMAXQ_M_U "u") (VHSUBQ_M_N_U "u")
+ (VMULQ_M_N_S "s") (VQSHLQ_M_U "u") (VRHADDQ_M_S "s")
+ (VEORQ_M_U "u") (VSHRQ_M_N_U "u") (VCADDQ_ROT90_M_U "u")
+ (VMLADAVAQ_P_U "u") (VEORQ_M_S "s") (VBRSRQ_M_N_S "s")
+ (VMULQ_M_U "u") (VQRDMLAHQ_M_N_S "s") (VHSUBQ_M_N_S "s")
+ (VQRSHLQ_M_S "s") (VMULQ_M_N_U "u")
+ (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u")
+ (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u")
+ (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u")
+ (VQDMLASHQ_M_N_S "s")
+ (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s")
+ (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u")
+ (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u")
+ (VQSUBQ_M_N_S "s") (VMULLTQ_INT_M_S "s")
+ (VORRQ_M_S "s") (VQDMLAHQ_M_N_U "u") (VRSHLQ_M_S "s")
+ (VHADDQ_M_U "u") (VHADDQ_M_N_S "s") (VMULLTQ_INT_M_U "u")
+ (VORRQ_M_U "u") (VHADDQ_M_S "s") (VHADDQ_M_N_U "u")
+ (VQDMLAHQ_M_N_S "s") (VMAXQ_M_S "s") (VORNQ_M_U "u")
+ (VCADDQ_ROT270_M_U "u") (VQADDQ_M_U "u")
+ (VQRDMLASHQ_M_N_S "s") (VBICQ_M_U "u") (VMINQ_M_U "u")
+ (VSUBQ_M_N_S "s") (VMULLBQ_INT_M_S "s") (VQSUBQ_M_S "s")
+ (VCADDQ_ROT90_M_S "s") (VRMULHQ_M_S "s") (VANDQ_M_U "u")
+ (VMULHQ_M_S "s") (VADDQ_M_S "s") (VQRDMLAHQ_M_N_U "u")
+ (VMLASQ_M_N_S "s") (VHSUBQ_M_S "s") (VRMULHQ_M_U "u")
+ (VQADDQ_M_N_S "s") (VSHRQ_M_N_S "s") (VANDQ_M_S "s")
+ (VABDQ_M_U "u") (VQSHLQ_M_S "s") (VABDQ_M_S "s")
+ (VSUBQ_M_N_U "u") (VMLAQ_M_N_S "s") (VBRSRQ_M_N_U "u")
+ (VADDQ_M_U "u") (VRSHLQ_M_U "u") (VSLIQ_M_N_S "s")
+ (VQADDQ_M_N_U "u") (VADDQ_M_N_S "s") (VQSUBQ_M_N_U "u")
+ (VMLAQ_M_N_U "u") (VMINQ_M_S "s") (VMULHQ_M_U "u")
+ (VQADDQ_M_S "s") (VBICQ_M_S "s") (VQSHLQ_M_N_S "s")
+ (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
+ (VSHRNTQ_M_N_U "u") (VSHRNTQ_M_N_S "s")
+ (VSHRNBQ_M_N_S "s") (VSHRNBQ_M_N_U "u")
+ (VSHLLTQ_M_N_S "s") (VSHLLTQ_M_N_U "u")
+ (VSHLLBQ_M_N_S "s") (VSHLLBQ_M_N_U "u")
+ (VRSHRNTQ_M_N_S "s") (VRSHRNTQ_M_N_U "u")
+ (VRSHRNBQ_M_N_U "u") (VRSHRNBQ_M_N_S "s")
+ (VQSHRNTQ_M_N_U "u") (VQSHRNTQ_M_N_S "s")
+ (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
+ (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
+ (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
+ (VMLALDAVAXQ_P_S "s")
+ (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
+ (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
+ (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
+ (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s")
+ (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u")
+ (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s")
+ (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
+ (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s")
+ (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u")
+ (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
+ (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
+ (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s")
+ (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u")
+ (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s")
+ (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u")
+ (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s")
+ (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u")
+ (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u")
+ (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
+ (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
+ (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
+ (VSTRDQSBWB_U "u") (VSBCQ_U "u") (VSBCQ_M_U "u")
+ (VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u")
+ (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
+ (VADCQ_U "u") (VADCQ_M_U "u") (VADCQ_S "s")
+ (VADCIQ_U "u") (VADCIQ_M_U "u") (VADCIQ_S "s")
+ (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48")
+ (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s")
+ (VSHLCQ_M_U "u")])
+
+(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32")
+ (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16")
+ (VCTP32Q_M "32") (VCTP64Q_M "64")])
;; Both kinds of return insn.
(define_code_iterator RETURNS [return simple_return])
@@ -1256,3 +1469,249 @@
;; An iterator for CDE MVE accumulator/non-accumulator versions.
(define_int_attr a [(UNSPEC_VCDE "") (UNSPEC_VCDEA "a")])
+
+;; MVE int iterator.
+(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
+(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
+(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
+(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
+(define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
+(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
+(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S])
+(define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
+(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
+(define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
+(define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
+(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
+(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S])
+(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
+(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
+(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
+(define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S])
+(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q])
+(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M])
+(define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U])
+(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
+(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U])
+(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U])
+(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U])
+(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S])
+(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U])
+(define_int_iterator VABDQ [VABDQ_S VABDQ_U])
+(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
+(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
+(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
+(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
+(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
+(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
+(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
+(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
+(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
+(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
+(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
+(define_int_iterator VEORQ [VEORQ_U VEORQ_S])
+(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
+(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
+(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
+(define_int_iterator VHSUBQ_N [VHSUBQ_N_U VHSUBQ_N_S])
+(define_int_iterator VMAXQ [VMAXQ_U VMAXQ_S])
+(define_int_iterator VMAXVQ [VMAXVQ_U VMAXVQ_S])
+(define_int_iterator VMINQ [VMINQ_S VMINQ_U])
+(define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
+(define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
+(define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
+(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
+(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
+(define_int_iterator VMULQ [VMULQ_U VMULQ_S])
+(define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
+(define_int_iterator VORNQ [VORNQ_U VORNQ_S])
+(define_int_iterator VORRQ [VORRQ_S VORRQ_U])
+(define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
+(define_int_iterator VQADDQ_N [VQADDQ_N_S VQADDQ_N_U])
+(define_int_iterator VQRSHLQ [VQRSHLQ_S VQRSHLQ_U])
+(define_int_iterator VQRSHLQ_N [VQRSHLQ_N_S VQRSHLQ_N_U])
+(define_int_iterator VQSHLQ [VQSHLQ_S VQSHLQ_U])
+(define_int_iterator VQSHLQ_N [VQSHLQ_N_S VQSHLQ_N_U])
+(define_int_iterator VQSHLQ_R [VQSHLQ_R_U VQSHLQ_R_S])
+(define_int_iterator VQSUBQ [VQSUBQ_U VQSUBQ_S])
+(define_int_iterator VQSUBQ_N [VQSUBQ_N_S VQSUBQ_N_U])
+(define_int_iterator VRHADDQ [VRHADDQ_S VRHADDQ_U])
+(define_int_iterator VRMULHQ [VRMULHQ_S VRMULHQ_U])
+(define_int_iterator VRSHLQ [VRSHLQ_S VRSHLQ_U])
+(define_int_iterator VRSHLQ_N [VRSHLQ_N_U VRSHLQ_N_S])
+(define_int_iterator VRSHRQ_N [VRSHRQ_N_S VRSHRQ_N_U])
+(define_int_iterator VSHLQ_N [VSHLQ_N_U VSHLQ_N_S])
+(define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U])
+(define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U])
+(define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U])
+(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U])
+(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U])
+(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S])
+(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S])
+(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S])
+(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U])
+(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S])
+(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S])
+(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S])
+(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U])
+(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S])
+(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S])
+(define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U])
+(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U])
+(define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U])
+(define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S])
+(define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U])
+(define_int_iterator VSHLCQ [VSHLCQ_S VSHLCQ_U])
+(define_int_iterator VRMLALDAVHAQ [VRMLALDAVHAQ_S VRMLALDAVHAQ_U])
+(define_int_iterator VADDVAQ_P [VADDVAQ_P_S VADDVAQ_P_U])
+(define_int_iterator VCLZQ_M [VCLZQ_M_S VCLZQ_M_U])
+(define_int_iterator VCMPEQQ_M_N [VCMPEQQ_M_N_S VCMPEQQ_M_N_U])
+(define_int_iterator VCMPEQQ_M [VCMPEQQ_M_S VCMPEQQ_M_U])
+(define_int_iterator VCMPNEQ_M_N [VCMPNEQ_M_N_S VCMPNEQ_M_N_U])
+(define_int_iterator VCMPNEQ_M [VCMPNEQ_M_S VCMPNEQ_M_U])
+(define_int_iterator VDUPQ_M_N [VDUPQ_M_N_S VDUPQ_M_N_U])
+(define_int_iterator VMAXVQ_P [VMAXVQ_P_S VMAXVQ_P_U])
+(define_int_iterator VMINVQ_P [VMINVQ_P_S VMINVQ_P_U])
+(define_int_iterator VMLADAVAQ [VMLADAVAQ_S VMLADAVAQ_U])
+(define_int_iterator VMLADAVQ_P [VMLADAVQ_P_S VMLADAVQ_P_U])
+(define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U])
+(define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
+(define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
+(define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
+(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S])
+(define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S])
+(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S])
+(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S])
+(define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
+(define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U])
+(define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U])
+(define_int_iterator VRSHLQ_M_N [VRSHLQ_M_N_S VRSHLQ_M_N_U])
+(define_int_iterator VSHLQ_M_R [VSHLQ_M_R_S VSHLQ_M_R_U])
+(define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U])
+(define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U])
+(define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S])
+(define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U])
+(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S])
+(define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S])
+(define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S])
+(define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U])
+(define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U])
+(define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U])
+(define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S])
+(define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S])
+(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S])
+(define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U])
+(define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S])
+(define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S])
+(define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U])
+(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S])
+(define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S])
+(define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
+(define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
+(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U])
+(define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
+(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
+(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
+(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U])
+(define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U])
+(define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S])
+(define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U])
+(define_int_iterator VADDLVAQ_P [VADDLVAQ_P_U VADDLVAQ_P_S])
+(define_int_iterator VABAVQ_P [VABAVQ_P_S VABAVQ_P_U])
+(define_int_iterator VSHLQ_M [VSHLQ_M_S VSHLQ_M_U])
+(define_int_iterator VSRIQ_M_N [VSRIQ_M_N_S VSRIQ_M_N_U])
+(define_int_iterator VSUBQ_M [VSUBQ_M_U VSUBQ_M_S])
+(define_int_iterator VCVTQ_M_N_TO_F [VCVTQ_M_N_TO_F_U VCVTQ_M_N_TO_F_S])
+(define_int_iterator VHSUBQ_M [VHSUBQ_M_S VHSUBQ_M_U])
+(define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
+(define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
+(define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
+(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S])
+(define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U])
+(define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U])
+(define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U])
+(define_int_iterator VHADDQ_M_N [VHADDQ_M_N_S VHADDQ_M_N_U])
+(define_int_iterator VORRQ_M [VORRQ_M_S VORRQ_M_U])
+(define_int_iterator VRMULHQ_M [VRMULHQ_M_U VRMULHQ_M_S])
+(define_int_iterator VQADDQ_M [VQADDQ_M_U VQADDQ_M_S])
+(define_int_iterator VRSHRQ_M_N [VRSHRQ_M_N_S VRSHRQ_M_N_U])
+(define_int_iterator VQSUBQ_M_N [VQSUBQ_M_N_U VQSUBQ_M_N_S])
+(define_int_iterator VADDQ_M [VADDQ_M_U VADDQ_M_S])
+(define_int_iterator VORNQ_M [VORNQ_M_U VORNQ_M_S])
+(define_int_iterator VRHADDQ_M [VRHADDQ_M_U VRHADDQ_M_S])
+(define_int_iterator VQSHLQ_M [VQSHLQ_M_U VQSHLQ_M_S])
+(define_int_iterator VANDQ_M [VANDQ_M_U VANDQ_M_S])
+(define_int_iterator VBICQ_M [VBICQ_M_U VBICQ_M_S])
+(define_int_iterator VSHLQ_M_N [VSHLQ_M_N_S VSHLQ_M_N_U])
+(define_int_iterator VCADDQ_ROT270_M [VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S])
+(define_int_iterator VQRSHLQ_M [VQRSHLQ_M_U VQRSHLQ_M_S])
+(define_int_iterator VQADDQ_M_N [VQADDQ_M_N_U VQADDQ_M_N_S])
+(define_int_iterator VADDQ_M_N [VADDQ_M_N_S VADDQ_M_N_U])
+(define_int_iterator VMAXQ_M [VMAXQ_M_S VMAXQ_M_U])
+(define_int_iterator VQSUBQ_M [VQSUBQ_M_U VQSUBQ_M_S])
+(define_int_iterator VMLASQ_M_N [VMLASQ_M_N_U VMLASQ_M_N_S])
+(define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S])
+(define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S])
+(define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S])
+(define_int_iterator VCADDQ_ROT90_M [VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S])
+(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U])
+(define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U])
+(define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U])
+(define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U])
+(define_int_iterator VHADDQ_M [VHADDQ_M_S VHADDQ_M_U])
+(define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U])
+(define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U])
+(define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U])
+(define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S])
+(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_S])
+(define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S])
+(define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U])
+(define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S])
+(define_int_iterator VQSHRNTQ_M_N [VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U])
+(define_int_iterator VRSHRNBQ_M_N [VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S])
+(define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S])
+(define_int_iterator VSHLLBQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S])
+(define_int_iterator VSHLLTQ_M_N [VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
+(define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
+(define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
+(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
+(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U])
+(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U])
+(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
+(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U])
+(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
+(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U])
+(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
+(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
+(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U])
+(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U])
+(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
+(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
+(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
+(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
+(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
+(define_int_iterator VST1Q [VST1Q_S VST1Q_U])
+(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U])
+(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U])
+(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U])
+(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U])
+(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
+(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U])
+(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U])
+(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U])
+(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U])
+(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
+(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
+(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
+(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
+(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S])
+(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S])
+(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S])
+(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S])
+(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S])
+(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S])
+(define_int_iterator VADCQ [VADCQ_U VADCQ_S])
+(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
+(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
+(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
+(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 465b39a..ecbaaa9 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -17,654 +17,6 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-(define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
-(define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
-(define_mode_iterator MVE_0 [V8HF V4SF])
-(define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
-(define_mode_iterator MVE_3 [V16QI V8HI])
-(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
-(define_mode_iterator MVE_5 [V8HI V4SI])
-(define_mode_iterator MVE_6 [V8HI V4SI])
-
-(define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
- VRNDAQ_F VREV64Q_F VNEGQ_F VDUPQ_N_F VABSQ_F VREV32Q_F
- VCVTTQ_F32_F16 VCVTBQ_F32_F16 VCVTQ_TO_F_S VQNEGQ_S
- VCVTQ_TO_F_U VREV16Q_S VREV16Q_U VADDLVQ_S VMVNQ_N_S
- VMVNQ_N_U VCVTAQ_S VCVTAQ_U VREV64Q_S VREV64Q_U
- VQABSQ_S VNEGQ_S VMVNQ_S VMVNQ_U VDUPQ_N_U VDUPQ_N_S
- VCLZQ_U VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U VABSQ_S
- VREV32Q_U VREV32Q_S VMOVLTQ_U VMOVLTQ_S VMOVLBQ_S
- VMOVLBQ_U VCVTQ_FROM_F_S VCVTQ_FROM_F_U VCVTPQ_S
- VCVTPQ_U VCVTNQ_S VCVTNQ_U VCVTMQ_S VCVTMQ_U
- VADDLVQ_U VCTP8Q VCTP16Q VCTP32Q VCTP64Q VPNOT
- VCREATEQ_F VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U VBRSRQ_N_F
- VSUBQ_N_F VCREATEQ_U VCREATEQ_S VSHRQ_N_S VSHRQ_N_U
- VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U VADDLVQ_P_S
- VADDLVQ_P_U VCMPNEQ_U VCMPNEQ_S VSHLQ_S VSHLQ_U VABDQ_S
- VADDQ_N_S VADDVAQ_S VADDVQ_P_S VANDQ_S VBICQ_S
- VBRSRQ_N_S VCADDQ_ROT270_S VCADDQ_ROT90_S VCMPEQQ_S
- VCMPEQQ_N_S VCMPNEQ_N_S VEORQ_S VHADDQ_S VHADDQ_N_S
- VHSUBQ_S VHSUBQ_N_S VMAXQ_S VMAXVQ_S VMINQ_S VMINVQ_S
- VMLADAVQ_S VMULHQ_S VMULLBQ_INT_S VMULLTQ_INT_S VMULQ_S
- VMULQ_N_S VORNQ_S VORRQ_S VQADDQ_S VQADDQ_N_S VQRSHLQ_S
- VQRSHLQ_N_S VQSHLQ_S VQSHLQ_N_S VQSHLQ_R_S VQSUBQ_S
- VQSUBQ_N_S VRHADDQ_S VRMULHQ_S VRSHLQ_S VRSHLQ_N_S
- VRSHRQ_N_S VSHLQ_N_S VSHLQ_R_S VSUBQ_S VSUBQ_N_S
- VABDQ_U VADDQ_N_U VADDVAQ_U VADDVQ_P_U VANDQ_U VBICQ_U
- VBRSRQ_N_U VCADDQ_ROT270_U VCADDQ_ROT90_U VCMPEQQ_U
- VCMPEQQ_N_U VCMPNEQ_N_U VEORQ_U VHADDQ_U VHADDQ_N_U
- VHSUBQ_U VHSUBQ_N_U VMAXQ_U VMAXVQ_U VMINQ_U VMINVQ_U
- VMLADAVQ_U VMULHQ_U VMULLBQ_INT_U VMULLTQ_INT_U VMULQ_U
- VMULQ_N_U VORNQ_U VORRQ_U VQADDQ_U VQADDQ_N_U VQRSHLQ_U
- VQRSHLQ_N_U VQSHLQ_U VQSHLQ_N_U VQSHLQ_R_U VQSUBQ_U
- VQSUBQ_N_U VRHADDQ_U VRMULHQ_U VRSHLQ_U VRSHLQ_N_U
- VRSHRQ_N_U VSHLQ_N_U VSHLQ_R_U VSUBQ_U VSUBQ_N_U
- VCMPGEQ_N_S VCMPGEQ_S VCMPGTQ_N_S VCMPGTQ_S VCMPLEQ_N_S
- VCMPLEQ_S VCMPLTQ_N_S VCMPLTQ_S VHCADDQ_ROT270_S
- VHCADDQ_ROT90_S VMAXAQ_S VMAXAVQ_S VMINAQ_S VMINAVQ_S
- VMLADAVXQ_S VMLSDAVQ_S VMLSDAVXQ_S VQDMULHQ_N_S
- VQDMULHQ_S VQRDMULHQ_N_S VQRDMULHQ_S VQSHLUQ_N_S
- VCMPCSQ_N_U VCMPCSQ_U VCMPHIQ_N_U VCMPHIQ_U VABDQ_M_S
- VABDQ_M_U VABDQ_F VADDQ_N_F VANDQ_F VBICQ_F
- VCADDQ_ROT270_F VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F
- VCMPGEQ_F VCMPGEQ_N_F VCMPGTQ_F VCMPGTQ_N_F VCMPLEQ_F
- VCMPLEQ_N_F VCMPLTQ_F VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F
- VCMULQ_F VCMULQ_ROT180_F VCMULQ_ROT270_F VCMULQ_ROT90_F
- VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F VMAXNMQ_F VMAXNMVQ_F
- VMINNMAQ_F VMINNMAVQ_F VMINNMQ_F VMINNMVQ_F VMULQ_F
- VMULQ_N_F VORNQ_F VORRQ_F VSUBQ_F VADDLVAQ_U
- VADDLVAQ_S VBICQ_N_U VBICQ_N_S VCTP8Q_M VCTP16Q_M
- VCTP32Q_M VCTP64Q_M VCVTBQ_F16_F32 VCVTTQ_F16_F32
- VMLALDAVQ_U VMLALDAVXQ_U VMLALDAVXQ_S VMLALDAVQ_S
- VMLSLDAVQ_S VMLSLDAVXQ_S VMOVNBQ_U VMOVNBQ_S
- VMOVNTQ_U VMOVNTQ_S VORRQ_N_S VORRQ_N_U VQDMULLBQ_N_S
- VQDMULLBQ_S VQDMULLTQ_N_S VQDMULLTQ_S VQMOVNBQ_U
- VQMOVNBQ_S VQMOVUNBQ_S VQMOVUNTQ_S VRMLALDAVHXQ_S
- VRMLSLDAVHQ_S VRMLSLDAVHXQ_S VSHLLBQ_S
- VSHLLBQ_U VSHLLTQ_U VSHLLTQ_S VQMOVNTQ_U VQMOVNTQ_S
- VSHLLBQ_N_S VSHLLBQ_N_U VSHLLTQ_N_U VSHLLTQ_N_S
- VRMLALDAVHQ_U VRMLALDAVHQ_S VMULLTQ_POLY_P
- VMULLBQ_POLY_P VBICQ_M_N_S VBICQ_M_N_U VCMPEQQ_M_F
- VCVTAQ_M_S VCVTAQ_M_U VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U
- VQRSHRNBQ_N_U VQRSHRNBQ_N_S VQRSHRUNBQ_N_S
- VRMLALDAVHAQ_S VABAVQ_S VABAVQ_U VSHLCQ_S VSHLCQ_U
- VRMLALDAVHAQ_U VABSQ_M_S VADDVAQ_P_S VADDVAQ_P_U
- VCLSQ_M_S VCLZQ_M_S VCLZQ_M_U VCMPCSQ_M_N_U
- VCMPCSQ_M_U VCMPEQQ_M_N_S VCMPEQQ_M_N_U VCMPEQQ_M_S
- VCMPEQQ_M_U VCMPGEQ_M_N_S VCMPGEQ_M_S VCMPGTQ_M_N_S
- VCMPGTQ_M_S VCMPHIQ_M_N_U VCMPHIQ_M_U VCMPLEQ_M_N_S
- VCMPLEQ_M_S VCMPLTQ_M_N_S VCMPLTQ_M_S VCMPNEQ_M_N_S
- VCMPNEQ_M_N_U VCMPNEQ_M_S VCMPNEQ_M_U VDUPQ_M_N_S
- VDUPQ_M_N_U VDWDUPQ_N_U VDWDUPQ_WB_U VIWDUPQ_N_U
- VIWDUPQ_WB_U VMAXAQ_M_S VMAXAVQ_P_S VMAXVQ_P_S
- VMAXVQ_P_U VMINAQ_M_S VMINAVQ_P_S VMINVQ_P_S VMINVQ_P_U
- VMLADAVAQ_S VMLADAVAQ_U VMLADAVQ_P_S VMLADAVQ_P_U
- VMLADAVXQ_P_S VMLAQ_N_S VMLAQ_N_U VMLASQ_N_S VMLASQ_N_U
- VMLSDAVQ_P_S VMLSDAVXQ_P_S VMVNQ_M_S VMVNQ_M_U
- VNEGQ_M_S VPSELQ_S VPSELQ_U VQABSQ_M_S VQDMLAHQ_N_S
- VQDMLAHQ_N_U VQNEGQ_M_S VQRDMLADHQ_S VQRDMLADHXQ_S
- VQRDMLAHQ_N_S VQRDMLAHQ_N_U VQRDMLASHQ_N_S
- VQRDMLASHQ_N_U VQRDMLSDHQ_S VQRDMLSDHXQ_S VQRSHLQ_M_N_S
- VQRSHLQ_M_N_U VQSHLQ_M_R_S VQSHLQ_M_R_U VREV64Q_M_S
- VREV64Q_M_U VRSHLQ_M_N_S VRSHLQ_M_N_U VSHLQ_M_R_S
- VSHLQ_M_R_U VSLIQ_N_S VSLIQ_N_U VSRIQ_N_S VSRIQ_N_U
- VQDMLSDHXQ_S VQDMLSDHQ_S VQDMLADHXQ_S VQDMLADHQ_S
- VMLSDAVAXQ_S VMLSDAVAQ_S VMLADAVAXQ_S
- VCMPGEQ_M_F VCMPGTQ_M_N_F VMLSLDAVQ_P_S VRMLALDAVHAXQ_S
- VMLSLDAVXQ_P_S VFMAQ_F VMLSLDAVAQ_S VQSHRUNBQ_N_S
- VQRSHRUNTQ_N_S VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F
- VDUPQ_M_N_F VCMPGTQ_M_F VCMPLTQ_M_F VRMLSLDAVHQ_P_S
- VQSHRUNTQ_N_S VABSQ_M_F VMAXNMAVQ_P_F VFMAQ_N_F
- VRMLSLDAVHXQ_P_S VREV32Q_M_F VRMLSLDAVHAQ_S
- VRMLSLDAVHAXQ_S VCMPLTQ_M_N_F VCMPNEQ_M_F VRNDAQ_M_F
- VRNDPQ_M_F VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F
- VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F
- VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F VCMLAQ_ROT90_F
- VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F VRNDMQ_M_F
- VCMPLEQ_M_N_F VCMPGEQ_M_N_F VRNDNQ_M_F VMINNMAVQ_P_F
- VCMPNEQ_M_N_F VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S
- VCMPEQQ_M_N_F VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F
- VMLALDAVQ_P_U VMLALDAVQ_P_S VQMOVNBQ_M_S VQMOVNBQ_M_U
- VMOVLTQ_M_U VMOVLTQ_M_S VMOVNBQ_M_U VMOVNBQ_M_S
- VRSHRNTQ_N_U VRSHRNTQ_N_S VORRQ_M_N_S VORRQ_M_N_U
- VREV32Q_M_S VREV32Q_M_U VQRSHRNTQ_N_U VQRSHRNTQ_N_S
- VMOVNTQ_M_U VMOVNTQ_M_S VMOVLBQ_M_U VMOVLBQ_M_S
- VMLALDAVAQ_S VMLALDAVAQ_U VQSHRNBQ_N_U VQSHRNBQ_N_S
- VSHRNBQ_N_U VSHRNBQ_N_S VRSHRNBQ_N_S VRSHRNBQ_N_U
- VMLALDAVXQ_P_U VMLALDAVXQ_P_S VQMOVNTQ_M_U VQMOVNTQ_M_S
- VMVNQ_M_N_U VMVNQ_M_N_S VQSHRNTQ_N_U VQSHRNTQ_N_S
- VMLALDAVAXQ_S VMLALDAVAXQ_U VSHRNTQ_N_S VSHRNTQ_N_U
- VCVTBQ_M_F16_F32 VCVTBQ_M_F32_F16 VCVTTQ_M_F16_F32
- VCVTTQ_M_F32_F16 VCVTMQ_M_S VCVTMQ_M_U VCVTNQ_M_S
- VCVTPQ_M_S VCVTPQ_M_U VCVTQ_M_N_FROM_F_S VCVTNQ_M_U
- VREV16Q_M_S VREV16Q_M_U VREV32Q_M VCVTQ_M_FROM_F_U
- VCVTQ_M_FROM_F_S VRMLALDAVHQ_P_U VADDLVAQ_P_U
- VCVTQ_M_N_FROM_F_U VQSHLUQ_M_N_S VABAVQ_P_S
- VABAVQ_P_U VSHLQ_M_S VSHLQ_M_U VSRIQ_M_N_S
- VSRIQ_M_N_U VSUBQ_M_U VSUBQ_M_S VCVTQ_M_N_TO_F_U
- VCVTQ_M_N_TO_F_S VQADDQ_M_U VQADDQ_M_S
- VRSHRQ_M_N_S VSUBQ_M_N_S VSUBQ_M_N_U VBRSRQ_M_N_S
- VSUBQ_M_N_F VBICQ_M_F VHADDQ_M_U VBICQ_M_U VBICQ_M_S
- VMULQ_M_N_U VHADDQ_M_S VORNQ_M_F VMLAQ_M_N_S VQSUBQ_M_U
- VQSUBQ_M_S VMLAQ_M_N_U VQSUBQ_M_N_U VQSUBQ_M_N_S
- VMULLTQ_INT_M_S VMULLTQ_INT_M_U VMULQ_M_N_S VMULQ_M_N_F
- VMLASQ_M_N_U VMLASQ_M_N_S VMAXQ_M_U VQRDMLAHQ_M_N_U
- VCADDQ_ROT270_M_F VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S
- VQRSHLQ_M_S VMULQ_M_F VRHADDQ_M_U VSHRQ_M_N_U
- VRHADDQ_M_S VMULQ_M_S VMULQ_M_U VQRDMLASHQ_M_N_S
- VRSHLQ_M_S VRSHLQ_M_U VRSHRQ_M_N_U VADDQ_M_N_F
- VADDQ_M_N_S VADDQ_M_N_U VQRDMLASHQ_M_N_U VMAXQ_M_S
- VQRDMLAHQ_M_N_S VORRQ_M_S VORRQ_M_U VORRQ_M_F
- VQRSHLQ_M_U VRMULHQ_M_U VRMULHQ_M_S VMINQ_M_S VMINQ_M_U
- VANDQ_M_F VANDQ_M_U VANDQ_M_S VHSUBQ_M_N_S VHSUBQ_M_N_U
- VMULHQ_M_S VMULHQ_M_U VMULLBQ_INT_M_U
- VMULLBQ_INT_M_S VCADDQ_ROT90_M_F
- VSHRQ_M_N_S VADDQ_M_U VSLIQ_M_N_U
- VQADDQ_M_N_S VBRSRQ_M_N_F VABDQ_M_F VBRSRQ_M_N_U
- VEORQ_M_F VSHLQ_M_N_S VQDMLAHQ_M_N_U VQDMLAHQ_M_N_S
- VSHLQ_M_N_U VMLADAVAQ_P_U VMLADAVAQ_P_S VSLIQ_M_N_S
- VQSHLQ_M_U VQSHLQ_M_S VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S
- VORNQ_M_U VORNQ_M_S VQSHLQ_M_N_S VQSHLQ_M_N_U VADDQ_M_S
- VHADDQ_M_N_S VADDQ_M_F VQADDQ_M_N_U VEORQ_M_S VEORQ_M_U
- VHSUBQ_M_S VHSUBQ_M_U VHADDQ_M_N_U VHCADDQ_ROT90_M_S
- VQRDMLSDHQ_M_S VQRDMLSDHXQ_M_S VQRDMLADHXQ_M_S
- VQDMULHQ_M_S VMLADAVAXQ_P_S VQDMLADHXQ_M_S
- VQRDMULHQ_M_S VMLSDAVAXQ_P_S VQDMULHQ_M_N_S
- VHCADDQ_ROT270_M_S VQDMLSDHQ_M_S VQDMLSDHXQ_M_S
- VMLSDAVAQ_P_S VQRDMLADHQ_M_S VQDMLADHQ_M_S
- VMLALDAVAQ_P_U VMLALDAVAQ_P_S VMLALDAVAXQ_P_U
- VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S VQRSHRNTQ_M_N_S
- VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S VQSHRNTQ_M_N_S
- VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S VRSHRNTQ_M_N_U
- VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S
- VSHRNBQ_M_N_S VSHRNBQ_M_N_U VSHRNTQ_M_N_S VSHRNTQ_M_N_U
- VMLALDAVAXQ_P_S VQRSHRNTQ_M_N_U VQSHRNTQ_M_N_U
- VRSHRNTQ_M_N_S VQRDMULHQ_M_N_S VRMLALDAVHAQ_P_S
- VMLSLDAVAQ_P_S VMLSLDAVAXQ_P_S VMULLBQ_POLY_M_P
- VMULLTQ_POLY_M_P VQDMULLBQ_M_N_S VQDMULLBQ_M_S
- VQDMULLTQ_M_N_S VQDMULLTQ_M_S VQRSHRUNBQ_M_N_S
- VQRSHRUNTQ_M_N_SVQSHRUNBQ_M_N_S VQSHRUNTQ_M_N_S
- VRMLALDAVHAQ_P_U VRMLALDAVHAXQ_P_S VRMLSLDAVHAQ_P_S
- VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S
- VCMLAQ_M_F VCMLAQ_ROT180_M_F VCMLAQ_ROT270_M_F
- VCMLAQ_ROT90_M_F VCMULQ_M_F VCMULQ_ROT180_M_F
- VCMULQ_ROT270_M_F VCMULQ_ROT90_M_F VFMAQ_M_F
- VFMAQ_M_N_F VFMASQ_M_N_F VFMSQ_M_F VMAXNMQ_M_F
- VMINNMQ_M_F VSUBQ_M_F VSTRWQSB_S VSTRWQSB_U
- VSTRBQSO_S VSTRBQSO_U VSTRBQ_S VSTRBQ_U VLDRBQGO_S
- VLDRBQGO_U VLDRBQ_S VLDRBQ_U VLDRWQGB_S VLDRWQGB_U
- VLD1Q_F VLD1Q_S VLD1Q_U VLDRHQ_F VLDRHQGO_S
- VLDRHQGO_U VLDRHQGSO_S VLDRHQGSO_U VLDRHQ_S VLDRHQ_U
- VLDRWQ_F VLDRWQ_S VLDRWQ_U VLDRDQGB_S VLDRDQGB_U
- VLDRDQGO_S VLDRDQGO_U VLDRDQGSO_S VLDRDQGSO_U
- VLDRHQGO_F VLDRHQGSO_F VLDRWQGB_F VLDRWQGO_F
- VLDRWQGO_S VLDRWQGO_U VLDRWQGSO_F VLDRWQGSO_S
- VLDRWQGSO_U VSTRHQ_F VST1Q_S VST1Q_U VSTRHQSO_S
- VSTRHQSO_U VSTRHQSSO_S VSTRHQSSO_U VSTRHQ_S
- VSTRHQ_U VSTRWQ_S VSTRWQ_U VSTRWQ_F VST1Q_F VSTRDQSB_S
- VSTRDQSB_U VSTRDQSO_S VSTRDQSO_U VSTRDQSSO_S
- VSTRDQSSO_U VSTRWQSO_S VSTRWQSO_U VSTRWQSSO_S
- VSTRWQSSO_U VSTRHQSO_F VSTRHQSSO_F VSTRWQSB_F
- VSTRWQSO_F VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ
- VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M
- VSTRWQSBWB_S VSTRWQSBWB_U VLDRWQGBWB_S VLDRWQGBWB_U
- VSTRWQSBWB_F VLDRWQGBWB_F VSTRDQSBWB_S VSTRDQSBWB_U
- VLDRDQGBWB_S VLDRDQGBWB_U VADCQ_U VADCQ_M_U VADCQ_S
- VADCQ_M_S VSBCIQ_U VSBCIQ_S VSBCIQ_M_U VSBCIQ_M_S
- VSBCQ_U VSBCQ_S VSBCQ_M_U VSBCQ_M_S VADCIQ_U VADCIQ_M_U
- VADCIQ_S VADCIQ_M_S VLD2Q VLD4Q VST2Q SRSHRL SRSHR
- URSHR URSHRL SQRSHR UQRSHL UQRSHLL_64 VSHLCQ_M_U
- UQRSHLL_48 SQRSHRL_64 SQRSHRL_48 VSHLCQ_M_S])
-
-(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI")
- (V4SF "V4SI")])
-
-(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
- (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
- (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s")
- (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u")
- (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s")
- (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u")
- (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s")
- (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
- (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
- (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
- (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
- (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
- (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
- (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
- (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u")
- (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s")
- (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
- (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
- (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
- (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
- (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
- (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
- (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
- (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
- (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
- (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
- (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
- (VHADDQ_U "u") (VHSUBQ_N_S "s") (VHSUBQ_N_U "u")
- (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u")
- (VMAXVQ_S "s") (VMAXVQ_U "u") (VMINQ_S "s") (VMINQ_U "u")
- (VMINVQ_S "s") (VMINVQ_U "u") (VMLADAVQ_S "s")
- (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
- (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s")
- (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u")
- (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
- (VMULQ_U "u") (VORNQ_S "s") (VORNQ_U "u") (VORRQ_S "s")
- (VORRQ_U "u") (VQADDQ_N_S "s") (VQADDQ_N_U "u")
- (VQRSHLQ_N_S "s") (VQRSHLQ_N_U "u") (VQRSHLQ_S "s")
- (VQRSHLQ_U "u") (VQSHLQ_N_S "s") (VQSHLQ_N_U "u")
- (VQSHLQ_R_S "s") (VQSHLQ_R_U "u") (VQSHLQ_S "s")
- (VQSHLQ_U "u") (VQSUBQ_N_S "s") (VQSUBQ_N_U "u")
- (VQSUBQ_S "s") (VQSUBQ_U "u") (VRHADDQ_S "s")
- (VRHADDQ_U "u") (VRMULHQ_S "s") (VRMULHQ_U "u")
- (VRSHLQ_N_S "s") (VRSHLQ_N_U "u") (VRSHLQ_S "s")
- (VRSHLQ_U "u") (VRSHRQ_N_S "s") (VRSHRQ_N_U "u")
- (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s")
- (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u")
- (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s")
- (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u")
- (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u")
- (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s")
- (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u")
- (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u")
- (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s")
- (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s")
- (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u")
- (VRMLALDAVHQ_S "s") (VBICQ_M_N_S "s") (VBICQ_M_N_U "u")
- (VCVTAQ_M_S "s") (VCVTAQ_M_U "u") (VCVTQ_M_TO_F_S "s")
- (VCVTQ_M_TO_F_U "u") (VQRSHRNBQ_N_S "s")
- (VQRSHRNBQ_N_U "u") (VABAVQ_S "s") (VABAVQ_U "u")
- (VRMLALDAVHAQ_U "u") (VRMLALDAVHAQ_S "s") (VSHLCQ_S "s")
- (VSHLCQ_U "u") (VADDVAQ_P_S "s") (VADDVAQ_P_U "u")
- (VCLZQ_M_S "s") (VCLZQ_M_U "u") (VCMPEQQ_M_N_S "s")
- (VCMPEQQ_M_N_U "u") (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u")
- (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u") (VCMPNEQ_M_S "s")
- (VCMPNEQ_M_U "u") (VDUPQ_M_N_S "s") (VDUPQ_M_N_U "u")
- (VMAXVQ_P_S "s") (VMAXVQ_P_U "u") (VMINVQ_P_S "s")
- (VMINVQ_P_U "u") (VMLADAVAQ_S "s") (VMLADAVAQ_U "u")
- (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s")
- (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
- (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
- (VPSELQ_U "u") (VQDMLAHQ_N_S "s") (VQDMLAHQ_N_U "u")
- (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u")
- (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u")
- (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
- (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s")
- (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u")
- (VRSHLQ_M_N_S "s") (VRSHLQ_M_N_U "u") (VSHLQ_M_R_S "s")
- (VSHLQ_M_R_U "u") (VSLIQ_N_S "s") (VSLIQ_N_U "u")
- (VMLALDAVQ_P_S "s") (VQMOVNBQ_M_S "s") (VMOVLTQ_M_S "s")
- (VMOVNBQ_M_S "s") (VRSHRNTQ_N_S "s") (VORRQ_M_N_S "s")
- (VREV32Q_M_S "s") (VQRSHRNTQ_N_S "s") (VMOVNTQ_M_S "s")
- (VMOVLBQ_M_S "s") (VMLALDAVAQ_S "s") (VQSHRNBQ_N_S "s")
- (VSHRNBQ_N_S "s") (VRSHRNBQ_N_S "s") (VMLALDAVXQ_P_S "s")
- (VQMOVNTQ_M_S "s") (VMVNQ_M_N_S "s") (VQSHRNTQ_N_S "s")
- (VMLALDAVAXQ_S "s") (VSHRNTQ_N_S "s") (VMLALDAVQ_P_U "u")
- (VQMOVNBQ_M_U "u") (VMOVLTQ_M_U "u") (VMOVNBQ_M_U "u")
- (VRSHRNTQ_N_U "u") (VORRQ_M_N_U "u") (VREV32Q_M_U "u")
- (VREV16Q_M_S "s") (VREV16Q_M_U "u")
- (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u")
- (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u")
- (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u")
- (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u")
- (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s")
- (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u")
- (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s")
- (VCVTQ_M_N_FROM_F_U "u") (VCVTQ_M_FROM_F_S "s")
- (VCVTQ_M_FROM_F_U "u") (VRMLALDAVHQ_P_U "u")
- (VRMLALDAVHQ_P_S "s") (VADDLVAQ_P_U "u")
- (VCVTQ_M_N_FROM_F_S "s") (VABAVQ_P_U "u")
- (VABAVQ_P_S "s") (VSHLQ_M_S "s") (VSHLQ_M_U "u")
- (VSRIQ_M_N_S "s") (VSRIQ_M_N_U "u") (VSUBQ_M_S "s")
- (VSUBQ_M_U "u") (VCVTQ_M_N_TO_F_S "s")
- (VCVTQ_M_N_TO_F_U "u") (VADDQ_M_N_U "u")
- (VSHLQ_M_N_S "s") (VMAXQ_M_U "u") (VHSUBQ_M_N_U "u")
- (VMULQ_M_N_S "s") (VQSHLQ_M_U "u") (VRHADDQ_M_S "s")
- (VEORQ_M_U "u") (VSHRQ_M_N_U "u") (VCADDQ_ROT90_M_U "u")
- (VMLADAVAQ_P_U "u") (VEORQ_M_S "s") (VBRSRQ_M_N_S "s")
- (VMULQ_M_U "u") (VQRDMLAHQ_M_N_S "s") (VHSUBQ_M_N_S "s")
- (VQRSHLQ_M_S "s") (VMULQ_M_N_U "u")
- (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u")
- (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u")
- (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u")
- (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s")
- (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u")
- (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u")
- (VQSUBQ_M_N_S "s") (VMULLTQ_INT_M_S "s")
- (VORRQ_M_S "s") (VQDMLAHQ_M_N_U "u") (VRSHLQ_M_S "s")
- (VHADDQ_M_U "u") (VHADDQ_M_N_S "s") (VMULLTQ_INT_M_U "u")
- (VORRQ_M_U "u") (VHADDQ_M_S "s") (VHADDQ_M_N_U "u")
- (VQDMLAHQ_M_N_S "s") (VMAXQ_M_S "s") (VORNQ_M_U "u")
- (VCADDQ_ROT270_M_U "u") (VQADDQ_M_U "u")
- (VQRDMLASHQ_M_N_S "s") (VBICQ_M_U "u") (VMINQ_M_U "u")
- (VSUBQ_M_N_S "s") (VMULLBQ_INT_M_S "s") (VQSUBQ_M_S "s")
- (VCADDQ_ROT90_M_S "s") (VRMULHQ_M_S "s") (VANDQ_M_U "u")
- (VMULHQ_M_S "s") (VADDQ_M_S "s") (VQRDMLAHQ_M_N_U "u")
- (VMLASQ_M_N_S "s") (VHSUBQ_M_S "s") (VRMULHQ_M_U "u")
- (VQADDQ_M_N_S "s") (VSHRQ_M_N_S "s") (VANDQ_M_S "s")
- (VABDQ_M_U "u") (VQSHLQ_M_S "s") (VABDQ_M_S "s")
- (VSUBQ_M_N_U "u") (VMLAQ_M_N_S "s") (VBRSRQ_M_N_U "u")
- (VADDQ_M_U "u") (VRSHLQ_M_U "u") (VSLIQ_M_N_S "s")
- (VQADDQ_M_N_U "u") (VADDQ_M_N_S "s") (VQSUBQ_M_N_U "u")
- (VMLAQ_M_N_U "u") (VMINQ_M_S "s") (VMULHQ_M_U "u")
- (VQADDQ_M_S "s") (VBICQ_M_S "s") (VQSHLQ_M_N_S "s")
- (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
- (VSHRNTQ_M_N_U "u") (VSHRNTQ_M_N_S "s")
- (VSHRNBQ_M_N_S "s") (VSHRNBQ_M_N_U "u")
- (VSHLLTQ_M_N_S "s") (VSHLLTQ_M_N_U "u")
- (VSHLLBQ_M_N_S "s") (VSHLLBQ_M_N_U "u")
- (VRSHRNTQ_M_N_S "s") (VRSHRNTQ_M_N_U "u")
- (VRSHRNBQ_M_N_U "u") (VRSHRNBQ_M_N_S "s")
- (VQSHRNTQ_M_N_U "u") (VQSHRNTQ_M_N_S "s")
- (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
- (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
- (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
- (VMLALDAVAXQ_P_S "s") (VMLALDAVAXQ_P_U "u")
- (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
- (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
- (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
- (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s")
- (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u")
- (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s")
- (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
- (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s")
- (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u")
- (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
- (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
- (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s")
- (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u")
- (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s")
- (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u")
- (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s")
- (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u")
- (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u")
- (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
- (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
- (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
- (VSTRDQSBWB_U "u") (VSBCQ_U "u") (VSBCQ_M_U "u")
- (VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u")
- (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
- (VADCQ_U "u") (VADCQ_M_U "u") (VADCQ_S "s")
- (VADCIQ_U "u") (VADCIQ_M_U "u") (VADCIQ_S "s")
- (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48")
- (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s")
- (VSHLCQ_M_U "u")])
-
-(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32")
- (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16")
- (VCTP32Q_M "32") (VCTP64Q_M "64")])
-(define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16")
- (V4SI "mve_imm_32")
- (V8HF "mve_imm_16") (V4SF "mve_imm_32")])
-(define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf")
- (V8HF "Rd") (V4SF "Rf")])
-(define_mode_attr MVE_LANES [(V16QI "16") (V8HI "8") (V4SI "4")])
-(define_mode_attr MVE_constraint [ (V16QI "Ra") (V8HI "Rc") (V4SI "Re")])
-(define_mode_attr MVE_pred [ (V16QI "mve_imm_7") (V8HI "mve_imm_15")
- (V4SI "mve_imm_31")])
-(define_mode_attr MVE_constraint3 [ (V8HI "Rb") (V4SI "Rd")])
-(define_mode_attr MVE_pred3 [ (V8HI "mve_imm_8") (V4SI "mve_imm_16")])
-(define_mode_attr MVE_constraint1 [ (V8HI "Ra") (V4SI "Rc")])
-(define_mode_attr MVE_pred1 [ (V8HI "mve_imm_7") (V4SI "mve_imm_15")])
-(define_mode_attr MVE_B_ELEM [ (V16QI "V16QI") (V8HI "V8QI") (V4SI "V4QI")])
-(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")])
-(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI "h") (V4SI "w") (V8HF "h")
- (V4SF "w")])
-(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
- (V8HF "u16") (V4SF "32")])
-
-(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
- (V8HF "=w") (V4SF "=&w")])
-
-(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
-(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
-(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
-(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
-(define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
-(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
-(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S])
-(define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
-(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
-(define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
-(define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
-(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
-(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S])
-(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
-(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
-(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
-(define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S])
-(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q])
-(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M])
-(define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U])
-(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
-(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U])
-(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U])
-(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U])
-(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S])
-(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U])
-(define_int_iterator VABDQ [VABDQ_S VABDQ_U])
-(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
-(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
-(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
-(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
-(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
-(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
-(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
-(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
-(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
-(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
-(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
-(define_int_iterator VEORQ [VEORQ_U VEORQ_S])
-(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
-(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
-(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
-(define_int_iterator VHSUBQ_N [VHSUBQ_N_U VHSUBQ_N_S])
-(define_int_iterator VMAXQ [VMAXQ_U VMAXQ_S])
-(define_int_iterator VMAXVQ [VMAXVQ_U VMAXVQ_S])
-(define_int_iterator VMINQ [VMINQ_S VMINQ_U])
-(define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
-(define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
-(define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
-(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
-(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
-(define_int_iterator VMULQ [VMULQ_U VMULQ_S])
-(define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
-(define_int_iterator VORNQ [VORNQ_U VORNQ_S])
-(define_int_iterator VORRQ [VORRQ_S VORRQ_U])
-(define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
-(define_int_iterator VQADDQ_N [VQADDQ_N_S VQADDQ_N_U])
-(define_int_iterator VQRSHLQ [VQRSHLQ_S VQRSHLQ_U])
-(define_int_iterator VQRSHLQ_N [VQRSHLQ_N_S VQRSHLQ_N_U])
-(define_int_iterator VQSHLQ [VQSHLQ_S VQSHLQ_U])
-(define_int_iterator VQSHLQ_N [VQSHLQ_N_S VQSHLQ_N_U])
-(define_int_iterator VQSHLQ_R [VQSHLQ_R_U VQSHLQ_R_S])
-(define_int_iterator VQSUBQ [VQSUBQ_U VQSUBQ_S])
-(define_int_iterator VQSUBQ_N [VQSUBQ_N_S VQSUBQ_N_U])
-(define_int_iterator VRHADDQ [VRHADDQ_S VRHADDQ_U])
-(define_int_iterator VRMULHQ [VRMULHQ_S VRMULHQ_U])
-(define_int_iterator VRSHLQ [VRSHLQ_S VRSHLQ_U])
-(define_int_iterator VRSHLQ_N [VRSHLQ_N_U VRSHLQ_N_S])
-(define_int_iterator VRSHRQ_N [VRSHRQ_N_S VRSHRQ_N_U])
-(define_int_iterator VSHLQ_N [VSHLQ_N_U VSHLQ_N_S])
-(define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U])
-(define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U])
-(define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U])
-(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U])
-(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U])
-(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S])
-(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S])
-(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S])
-(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U])
-(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S])
-(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S])
-(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S])
-(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U])
-(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S])
-(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S])
-(define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U])
-(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U])
-(define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U])
-(define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S])
-(define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U])
-(define_int_iterator VSHLCQ [VSHLCQ_S VSHLCQ_U])
-(define_int_iterator VRMLALDAVHAQ [VRMLALDAVHAQ_S VRMLALDAVHAQ_U])
-(define_int_iterator VADDVAQ_P [VADDVAQ_P_S VADDVAQ_P_U])
-(define_int_iterator VCLZQ_M [VCLZQ_M_S VCLZQ_M_U])
-(define_int_iterator VCMPEQQ_M_N [VCMPEQQ_M_N_S VCMPEQQ_M_N_U])
-(define_int_iterator VCMPEQQ_M [VCMPEQQ_M_S VCMPEQQ_M_U])
-(define_int_iterator VCMPNEQ_M_N [VCMPNEQ_M_N_S VCMPNEQ_M_N_U])
-(define_int_iterator VCMPNEQ_M [VCMPNEQ_M_S VCMPNEQ_M_U])
-(define_int_iterator VDUPQ_M_N [VDUPQ_M_N_S VDUPQ_M_N_U])
-(define_int_iterator VMAXVQ_P [VMAXVQ_P_S VMAXVQ_P_U])
-(define_int_iterator VMINVQ_P [VMINVQ_P_S VMINVQ_P_U])
-(define_int_iterator VMLADAVAQ [VMLADAVAQ_S VMLADAVAQ_U])
-(define_int_iterator VMLADAVQ_P [VMLADAVQ_P_S VMLADAVQ_P_U])
-(define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U])
-(define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
-(define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
-(define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
-(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U])
-(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S VQRDMLAHQ_N_U])
-(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S VQRDMLASHQ_N_U])
-(define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
-(define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U])
-(define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U])
-(define_int_iterator VRSHLQ_M_N [VRSHLQ_M_N_S VRSHLQ_M_N_U])
-(define_int_iterator VSHLQ_M_R [VSHLQ_M_R_S VSHLQ_M_R_U])
-(define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U])
-(define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U])
-(define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S])
-(define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U])
-(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S])
-(define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S])
-(define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S])
-(define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U])
-(define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U])
-(define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U])
-(define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S])
-(define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S])
-(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S])
-(define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U])
-(define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S])
-(define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S])
-(define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U])
-(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S])
-(define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S])
-(define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
-(define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
-(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U])
-(define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
-(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
-(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
-(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U])
-(define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U])
-(define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S])
-(define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U])
-(define_int_iterator VADDLVAQ_P [VADDLVAQ_P_U VADDLVAQ_P_S])
-(define_int_iterator VABAVQ_P [VABAVQ_P_S VABAVQ_P_U])
-(define_int_iterator VSHLQ_M [VSHLQ_M_S VSHLQ_M_U])
-(define_int_iterator VSRIQ_M_N [VSRIQ_M_N_S VSRIQ_M_N_U])
-(define_int_iterator VSUBQ_M [VSUBQ_M_U VSUBQ_M_S])
-(define_int_iterator VCVTQ_M_N_TO_F [VCVTQ_M_N_TO_F_U VCVTQ_M_N_TO_F_S])
-(define_int_iterator VHSUBQ_M [VHSUBQ_M_S VHSUBQ_M_U])
-(define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
-(define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
-(define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
-(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S])
-(define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U])
-(define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U])
-(define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U])
-(define_int_iterator VHADDQ_M_N [VHADDQ_M_N_S VHADDQ_M_N_U])
-(define_int_iterator VORRQ_M [VORRQ_M_S VORRQ_M_U])
-(define_int_iterator VRMULHQ_M [VRMULHQ_M_U VRMULHQ_M_S])
-(define_int_iterator VQADDQ_M [VQADDQ_M_U VQADDQ_M_S])
-(define_int_iterator VRSHRQ_M_N [VRSHRQ_M_N_S VRSHRQ_M_N_U])
-(define_int_iterator VQSUBQ_M_N [VQSUBQ_M_N_U VQSUBQ_M_N_S])
-(define_int_iterator VADDQ_M [VADDQ_M_U VADDQ_M_S])
-(define_int_iterator VORNQ_M [VORNQ_M_U VORNQ_M_S])
-(define_int_iterator VRHADDQ_M [VRHADDQ_M_U VRHADDQ_M_S])
-(define_int_iterator VQSHLQ_M [VQSHLQ_M_U VQSHLQ_M_S])
-(define_int_iterator VANDQ_M [VANDQ_M_U VANDQ_M_S])
-(define_int_iterator VBICQ_M [VBICQ_M_U VBICQ_M_S])
-(define_int_iterator VSHLQ_M_N [VSHLQ_M_N_S VSHLQ_M_N_U])
-(define_int_iterator VCADDQ_ROT270_M [VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S])
-(define_int_iterator VQRSHLQ_M [VQRSHLQ_M_U VQRSHLQ_M_S])
-(define_int_iterator VQADDQ_M_N [VQADDQ_M_N_U VQADDQ_M_N_S])
-(define_int_iterator VADDQ_M_N [VADDQ_M_N_S VADDQ_M_N_U])
-(define_int_iterator VMAXQ_M [VMAXQ_M_S VMAXQ_M_U])
-(define_int_iterator VQSUBQ_M [VQSUBQ_M_U VQSUBQ_M_S])
-(define_int_iterator VMLASQ_M_N [VMLASQ_M_N_U VMLASQ_M_N_S])
-(define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S])
-(define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S])
-(define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S])
-(define_int_iterator VCADDQ_ROT90_M [VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S])
-(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U])
-(define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U])
-(define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U])
-(define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U])
-(define_int_iterator VHADDQ_M [VHADDQ_M_S VHADDQ_M_U])
-(define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U])
-(define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U])
-(define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U])
-(define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S])
-(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_U VMLALDAVAXQ_P_S])
-(define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S])
-(define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U])
-(define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S])
-(define_int_iterator VQSHRNTQ_M_N [VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U])
-(define_int_iterator VRSHRNBQ_M_N [VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S])
-(define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S])
-(define_int_iterator VSHLLBQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S])
-(define_int_iterator VSHLLTQ_M_N [VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
-(define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
-(define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
-(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
-(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U])
-(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U])
-(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
-(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U])
-(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
-(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U])
-(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
-(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
-(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U])
-(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U])
-(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
-(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
-(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
-(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
-(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
-(define_int_iterator VST1Q [VST1Q_S VST1Q_U])
-(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U])
-(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U])
-(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U])
-(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U])
-(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
-(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U])
-(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U])
-(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U])
-(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U])
-(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
-(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
-(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
-(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
-(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S])
-(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S])
-(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S])
-(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S])
-(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S])
-(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S])
-(define_int_iterator VADCQ [VADCQ_U VADCQ_S])
-(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
-(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
-(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
-(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
-
(define_insn "*mve_mov<mode>"
[(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Ux,w")
(match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))]
@@ -1977,15 +1329,25 @@
;;
;; [vmaxq_u, vmaxq_s])
;;
-(define_insn "mve_vmaxq_<supf><mode>"
+(define_insn "mve_vmaxq_s<mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "=w")
- (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
- (match_operand:MVE_2 2 "s_register_operand" "w")]
- VMAXQ))
+ (smax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
+ ]
+ "TARGET_HAVE_MVE"
+ "vmax.%#<V_s_elem>\t%q0, %q1, %q2"
+ [(set_attr "type" "mve_move")
+])
+
+(define_insn "mve_vmaxq_u<mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (umax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE"
- "vmax.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
+ "vmax.%#<V_u_elem>\t%q0, %q1, %q2"
[(set_attr "type" "mve_move")
])
@@ -2037,15 +1399,25 @@
;;
;; [vminq_s, vminq_u])
;;
-(define_insn "mve_vminq_<supf><mode>"
+(define_insn "mve_vminq_s<mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "=w")
- (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
- (match_operand:MVE_2 2 "s_register_operand" "w")]
- VMINQ))
+ (smin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
+ ]
+ "TARGET_HAVE_MVE"
+ "vmin.%#<V_s_elem>\t%q0, %q1, %q2"
+ [(set_attr "type" "mve_move")
+])
+
+(define_insn "mve_vminq_u<mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (umin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE"
- "vmin.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
+ "vmin.%#<V_u_elem>\t%q0, %q1, %q2"
[(set_attr "type" "mve_move")
])
@@ -2199,6 +1571,17 @@
[(set_attr "type" "mve_move")
])
+(define_insn "mve_vmulq<mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (mult:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
+ ]
+ "TARGET_HAVE_MVE"
+ "vmul.i%#<V_sz_elem>\t%q0, %q1, %q2"
+ [(set_attr "type" "mve_move")
+])
+
;;
;; [vornq_u, vornq_s])
;;
@@ -2574,6 +1957,17 @@
[(set_attr "type" "mve_move")
])
+(define_insn "mve_vsubq<mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (minus:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
+ ]
+ "TARGET_HAVE_MVE"
+ "vsub.i%#<V_sz_elem>\t%q0, %q1, %q2"
+ [(set_attr "type" "mve_move")
+])
+
;;
;; [vabdq_f])
;;
@@ -3030,9 +2424,8 @@
(define_insn "mve_vmaxnmq_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VMAXNMQ_F))
+ (smax:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+ (match_operand:MVE_0 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vmaxnm.f%#<V_sz_elem> %q0, %q1, %q2"
@@ -3090,9 +2483,8 @@
(define_insn "mve_vminnmq_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VMINNMQ_F))
+ (smin:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+ (match_operand:MVE_0 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vminnm.f%#<V_sz_elem> %q0, %q1, %q2"
@@ -3210,9 +2602,8 @@
(define_insn "mve_vmulq_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VMULQ_F))
+ (mult:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+ (match_operand:MVE_0 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vmul.f%#<V_sz_elem> %q0, %q1, %q2"
@@ -3480,9 +2871,8 @@
(define_insn "mve_vsubq_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VSUBQ_F))
+ (minus:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+ (match_operand:MVE_0 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vsub.f%#<V_sz_elem>\t%q0, %q1, %q2"
@@ -4310,7 +3700,7 @@
(set_attr "length""8")])
;;
-;; [vqdmlahq_n_s, vqdmlahq_n_u])
+;; [vqdmlahq_n_s])
;;
(define_insn "mve_vqdmlahq_n_<supf><mode>"
[
@@ -4326,6 +3716,22 @@
])
;;
+;; [vqdmlashq_n_s])
+;;
+(define_insn "mve_vqdmlashq_n_<supf><mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+ (match_operand:MVE_2 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")]
+ VQDMLASHQ_N))
+ ]
+ "TARGET_HAVE_MVE"
+ "vqdmlash.s%#<V_sz_elem>\t%q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+])
+
+;;
;; [vqnegq_m_s])
;;
(define_insn "mve_vqnegq_m_s<mode>"
@@ -4374,7 +3780,7 @@
])
;;
-;; [vqrdmlahq_n_s, vqrdmlahq_n_u])
+;; [vqrdmlahq_n_s])
;;
(define_insn "mve_vqrdmlahq_n_<supf><mode>"
[
@@ -4390,7 +3796,7 @@
])
;;
-;; [vqrdmlashq_n_s, vqrdmlashq_n_u])
+;; [vqrdmlashq_n_s])
;;
(define_insn "mve_vqrdmlashq_n_<supf><mode>"
[
@@ -6552,6 +5958,23 @@
(set_attr "length""8")])
;;
+;; [vqdmlashq_m_n_s])
+;;
+(define_insn "mve_vqdmlashq_m_n_s<mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+ (match_operand:MVE_2 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VQDMLASHQ_M_N_S))
+ ]
+ "TARGET_HAVE_MVE"
+ "vpst\;vqdmlasht.s%#<V_sz_elem>\t%q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
;; [vqrdmlahq_m_n_s])
;;
(define_insn "mve_vqrdmlahq_m_n_s<mode>"
@@ -7113,7 +6536,7 @@
(set_attr "length""8")])
;;
-;; [vmlaldavaxq_p_u, vmlaldavaxq_p_s])
+;; [vmlaldavaxq_p_s])
;;
(define_insn "mve_vmlaldavaxq_p_<supf><mode>"
[
@@ -10315,38 +9738,10 @@
[(set_attr "type" "mve_move")
(set_attr "length""8")])
-(define_expand "mve_vstrwq_scatter_base_wb_<supf>v4si"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 2 "s_register_operand" "w")
- (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_wb = gen_reg_rtx (V4SImode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (ignore_wb, operands[0],
- operands[1], operands[2]));
- DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_add_<supf>v4si"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 2 "s_register_operand" "0")
- (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_vec = gen_reg_rtx (V4SImode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (operands[0], operands[2],
- operands[1], ignore_vec));
- DONE;
-})
-
;;
-;; [vstrwq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
+;; [vstrwq_scatter_base_wb_s vstrwq_scatter_base_wb_u]
;;
-(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10368,42 +9763,10 @@
}
[(set_attr "length" "4")])
-(define_expand "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 2 "s_register_operand" "w")
- (match_operand:HI 3 "vpr_register_operand")
- (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_wb = gen_reg_rtx (V4SImode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (ignore_wb, operands[0],
- operands[1], operands[2],
- operands[3]));
- DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_p_add_<supf>v4si"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 2 "s_register_operand" "0")
- (match_operand:HI 3 "vpr_register_operand")
- (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_vec = gen_reg_rtx (V4SImode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (operands[0], operands[2],
- operands[1], ignore_vec,
- operands[3]));
- DONE;
-})
-
;;
;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10426,38 +9789,10 @@
}
[(set_attr "length" "8")])
-(define_expand "mve_vstrwq_scatter_base_wb_fv4sf"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SF 2 "s_register_operand" "w")
- (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
- rtx ignore_wb = gen_reg_rtx (V4SImode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (ignore_wb,operands[0],
- operands[1], operands[2]));
- DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_add_fv4sf"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 2 "s_register_operand" "0")
- (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
- rtx ignore_vec = gen_reg_rtx (V4SFmode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (operands[0], operands[2],
- operands[1], ignore_vec));
- DONE;
-})
-
;;
;; [vstrwq_scatter_base_wb_f]
;;
-(define_insn "mve_vstrwq_scatter_base_wb_fv4sf_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10479,42 +9814,10 @@
}
[(set_attr "length" "4")])
-(define_expand "mve_vstrwq_scatter_base_wb_p_fv4sf"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SF 2 "s_register_operand" "w")
- (match_operand:HI 3 "vpr_register_operand")
- (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
- rtx ignore_wb = gen_reg_rtx (V4SImode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (ignore_wb, operands[0],
- operands[1], operands[2],
- operands[3]));
- DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_p_add_fv4sf"
- [(match_operand:V4SI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 2 "s_register_operand" "0")
- (match_operand:HI 3 "vpr_register_operand")
- (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
- rtx ignore_vec = gen_reg_rtx (V4SFmode);
- emit_insn (
- gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (operands[0], operands[2],
- operands[1], ignore_vec,
- operands[3]));
- DONE;
-})
-
;;
;; [vstrwq_scatter_base_wb_p_f]
;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10537,38 +9840,10 @@
}
[(set_attr "length" "8")])
-(define_expand "mve_vstrdq_scatter_base_wb_<supf>v2di"
- [(match_operand:V2DI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V2DI 2 "s_register_operand" "w")
- (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_wb = gen_reg_rtx (V2DImode);
- emit_insn (
- gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (ignore_wb, operands[0],
- operands[1], operands[2]));
- DONE;
-})
-
-(define_expand "mve_vstrdq_scatter_base_wb_add_<supf>v2di"
- [(match_operand:V2DI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V2DI 2 "s_register_operand" "0")
- (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_vec = gen_reg_rtx (V2DImode);
- emit_insn (
- gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (operands[0], operands[2],
- operands[1], ignore_vec));
- DONE;
-})
-
;;
;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
;;
-(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di_insn"
+(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V2DI 1 "s_register_operand" "0")
@@ -10590,42 +9865,10 @@
}
[(set_attr "length" "4")])
-(define_expand "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
- [(match_operand:V2DI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V2DI 2 "s_register_operand" "w")
- (match_operand:HI 3 "vpr_register_operand")
- (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_wb = gen_reg_rtx (V2DImode);
- emit_insn (
- gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (ignore_wb, operands[0],
- operands[1], operands[2],
- operands[3]));
- DONE;
-})
-
-(define_expand "mve_vstrdq_scatter_base_wb_p_add_<supf>v2di"
- [(match_operand:V2DI 0 "s_register_operand" "=w")
- (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
- (match_operand:V2DI 2 "s_register_operand" "0")
- (match_operand:HI 3 "vpr_register_operand")
- (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
- "TARGET_HAVE_MVE"
-{
- rtx ignore_vec = gen_reg_rtx (V2DImode);
- emit_insn (
- gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (operands[0], operands[2],
- operands[1], ignore_vec,
- operands[3]));
- DONE;
-})
-
;;
;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
;;
-(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn"
+(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V2DI 1 "s_register_operand" "0")
@@ -10643,7 +9886,7 @@
ops[0] = operands[1];
ops[1] = operands[2];
ops[2] = operands[3];
- output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]!",ops);
+ output_asm_insn ("vpst;vstrdt.u64\t%q2, [%q0, %1]!",ops);
return "";
}
[(set_attr "length" "8")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 3e7b51d..2d76769 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -501,7 +501,7 @@
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -509,54 +509,11 @@
(const_string "neon_add<q>")))]
)
-;; As with SFmode, full support for HFmode vector arithmetic is only available
-;; when flag-unsafe-math-optimizations is enabled.
-
-;; Add pattern with modes V8HF and V4HF is split into separate patterns to add
-;; support for standard pattern addv8hf3 in MVE. Following pattern is called
-;; from "addv8hf3" standard pattern inside vec-common.md file.
-
-(define_insn "addv8hf3_neon"
- [(set
- (match_operand:V8HF 0 "s_register_operand" "=w")
- (plus:V8HF
- (match_operand:V8HF 1 "s_register_operand" "w")
- (match_operand:V8HF 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_addsub_s_q")]
-)
-
-(define_insn "addv4hf3"
- [(set
- (match_operand:V4HF 0 "s_register_operand" "=w")
- (plus:V4HF
- (match_operand:V4HF 1 "s_register_operand" "w")
- (match_operand:V4HF 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_addsub_s_q")]
-)
-
-(define_insn "add<mode>3_fp16"
- [(set
- (match_operand:VH 0 "s_register_operand" "=w")
- (plus:VH
- (match_operand:VH 1 "s_register_operand" "w")
- (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST"
- "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "type")
- (if_then_else (match_test "<Is_float_mode>")
- (const_string "neon_fp_addsub_s<q>")
- (const_string "neon_add<q>")))]
-)
-
(define_insn "*sub<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -564,33 +521,11 @@
(const_string "neon_sub<q>")))]
)
-(define_insn "sub<mode>3"
- [(set
- (match_operand:VH 0 "s_register_operand" "=w")
- (minus:VH
- (match_operand:VH 1 "s_register_operand" "w")
- (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_sub<q>")]
-)
-
-(define_insn "sub<mode>3_fp16"
- [(set
- (match_operand:VH 0 "s_register_operand" "=w")
- (minus:VH
- (match_operand:VH 1 "s_register_operand" "w")
- (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_sub<q>")]
-)
-
(define_insn "*mul<mode>3_neon"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
(match_operand:VDQW 2 "s_register_operand" "w")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -635,7 +570,7 @@
(plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
(match_operand:VDQW 3 "s_register_operand" "w"))
(match_operand:VDQW 1 "s_register_operand" "0")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -648,7 +583,7 @@
(plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
(match_operand:VH 3 "s_register_operand" "w"))
(match_operand:VH 1 "s_register_operand" "0")))]
- "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -658,7 +593,7 @@
(minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
(mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
(match_operand:VDQW 3 "s_register_operand" "w"))))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -676,7 +611,7 @@
(fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
(match_operand:VCVTF 2 "register_operand" "w")
(match_operand:VCVTF 3 "register_operand" "0")))]
- "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
"vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -697,18 +632,7 @@
(match_operand:VH 1 "register_operand" "w")
(match_operand:VH 2 "register_operand" "w")
(match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_mla_s<q>")]
-)
-
-(define_insn "fma<VH:mode>4_intrinsic"
- [(set (match_operand:VH 0 "register_operand" "=w")
- (fma:VH
- (match_operand:VH 1 "register_operand" "w")
- (match_operand:VH 2 "register_operand" "w")
- (match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -718,7 +642,7 @@
(fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
(match_operand:VCVTF 2 "register_operand" "w")
(match_operand:VCVTF 3 "register_operand" "0")))]
- "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
"vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -1238,7 +1162,7 @@
(parallel [(const_int 0) (const_int 1)]))
(vec_select:V2SF (match_dup 1)
(parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_V4SF_ARITH"
"<VQH_mnem>.f32\t%P0, %e1, %f1"
[(set_attr "vqh_mnem" "<VQH_mnem>")
(set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
@@ -1305,7 +1229,7 @@
(define_expand "reduc_plus_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VD 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
{
rtx vec = gen_reg_rtx (<MODE>mode);
neon_pairwise_reduce (vec, operands[1], <MODE>mode,
@@ -1318,8 +1242,7 @@
(define_expand "reduc_plus_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VQ 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
+ "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
@@ -1354,7 +1277,7 @@
(define_expand "reduc_smin_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VD 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
{
rtx vec = gen_reg_rtx (<MODE>mode);
@@ -1368,8 +1291,7 @@
(define_expand "reduc_smin_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VQ 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
+ "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
@@ -1382,7 +1304,7 @@
(define_expand "reduc_smax_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VD 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
{
rtx vec = gen_reg_rtx (<MODE>mode);
neon_pairwise_reduce (vec, operands[1], <MODE>mode,
@@ -1395,8 +1317,7 @@
(define_expand "reduc_smax_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VQ 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
+ "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
@@ -1573,6 +1494,30 @@
[(set_attr "type" "neon_qsub<q>")]
)
+(define_expand "vec_cmp<mode><v_cmp_result>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+ (match_operator:<V_cmp_result> 1 "comparison_operator"
+ [(match_operand:VDQW 2 "s_register_operand")
+ (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+ [(set (match_operand:VDQIW 0 "s_register_operand")
+ (match_operator:VDQIW 1 "comparison_operator"
+ [(match_operand:VDQIW 2 "s_register_operand")
+ (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+ "TARGET_NEON"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
;; Conditional instructions. These are comparisons with conditional moves for
;; vectors. They perform the assignment:
;;
@@ -1586,230 +1531,53 @@
(if_then_else:VDQW
(match_operator 3 "comparison_operator"
[(match_operand:VDQW 4 "s_register_operand")
- (match_operand:VDQW 5 "nonmemory_operand")])
+ (match_operand:VDQW 5 "reg_or_zero_operand")])
(match_operand:VDQW 1 "s_register_operand")
(match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
- int inverse = 0;
- int use_zero_form = 0;
- int swap_bsl_operands = 0;
- rtx mask = gen_reg_rtx (<V_cmp_result>mode);
- rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
-
- rtx (*base_comparison) (rtx, rtx, rtx);
- rtx (*complimentary_comparison) (rtx, rtx, rtx);
-
- switch (GET_CODE (operands[3]))
- {
- case GE:
- case GT:
- case LE:
- case LT:
- case EQ:
- if (operands[5] == CONST0_RTX (<MODE>mode))
- {
- use_zero_form = 1;
- break;
- }
- /* Fall through. */
- default:
- if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
- }
-
- switch (GET_CODE (operands[3]))
- {
- case LT:
- case UNLT:
- inverse = 1;
- /* Fall through. */
- case GE:
- case UNGE:
- case ORDERED:
- case UNORDERED:
- base_comparison = gen_neon_vcge<mode>;
- complimentary_comparison = gen_neon_vcgt<mode>;
- break;
- case LE:
- case UNLE:
- inverse = 1;
- /* Fall through. */
- case GT:
- case UNGT:
- base_comparison = gen_neon_vcgt<mode>;
- complimentary_comparison = gen_neon_vcge<mode>;
- break;
- case EQ:
- case NE:
- case UNEQ:
- base_comparison = gen_neon_vceq<mode>;
- complimentary_comparison = gen_neon_vceq<mode>;
- break;
- default:
- gcc_unreachable ();
- }
-
- switch (GET_CODE (operands[3]))
- {
- case LT:
- case LE:
- case GT:
- case GE:
- case EQ:
- /* The easy case. Here we emit one of vcge, vcgt or vceq.
- As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
- a GE b -> a GE b
- a GT b -> a GT b
- a LE b -> b GE a
- a LT b -> b GT a
- a EQ b -> a EQ b
- Note that there also exist direct comparison against 0 forms,
- so catch those as a special case. */
- if (use_zero_form)
- {
- inverse = 0;
- switch (GET_CODE (operands[3]))
- {
- case LT:
- base_comparison = gen_neon_vclt<mode>;
- break;
- case LE:
- base_comparison = gen_neon_vcle<mode>;
- break;
- default:
- /* Do nothing, other zero form cases already have the correct
- base_comparison. */
- break;
- }
- }
-
- if (!inverse)
- emit_insn (base_comparison (mask, operands[4], operands[5]));
- else
- emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
- break;
- case UNLT:
- case UNLE:
- case UNGT:
- case UNGE:
- case NE:
- /* Vector compare returns false for lanes which are unordered, so if we use
- the inverse of the comparison we actually want to emit, then
- swap the operands to BSL, we will end up with the correct result.
- Note that a NE NaN and NaN NE b are true for all a, b.
-
- Our transformations are:
- a GE b -> !(b GT a)
- a GT b -> !(b GE a)
- a LE b -> !(a GT b)
- a LT b -> !(a GE b)
- a NE b -> !(a EQ b) */
-
- if (inverse)
- emit_insn (base_comparison (mask, operands[4], operands[5]));
- else
- emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-
- swap_bsl_operands = 1;
- break;
- case UNEQ:
- /* We check (a > b || b > a). combining these comparisons give us
- true iff !(a != b && a ORDERED b), swapping the operands to BSL
- will then give us (a == b || a UNORDERED b) as intended. */
-
- emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
- emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
- swap_bsl_operands = 1;
- break;
- case UNORDERED:
- /* Operands are ORDERED iff (a > b || b >= a).
- Swapping the operands to BSL will give the UNORDERED case. */
- swap_bsl_operands = 1;
- /* Fall through. */
- case ORDERED:
- emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
- emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
- break;
- default:
- gcc_unreachable ();
- }
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
- if (swap_bsl_operands)
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
- operands[1]));
- else
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
- operands[2]));
+(define_expand "vcond<V_cvtto><mode>"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
+ (if_then_else:<V_CVTTO>
+ (match_operator 3 "comparison_operator"
+ [(match_operand:V32 4 "s_register_operand")
+ (match_operand:V32 5 "reg_or_zero_operand")])
+ (match_operand:<V_CVTTO> 1 "s_register_operand")
+ (match_operand:<V_CVTTO> 2 "s_register_operand")))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vcond (operands, <V_cmp_result>mode);
DONE;
})
-(define_expand "vcondu<mode><mode>"
- [(set (match_operand:VDQIW 0 "s_register_operand")
- (if_then_else:VDQIW
+(define_expand "vcondu<mode><v_cmp_result>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
(match_operator 3 "arm_comparison_operator"
- [(match_operand:VDQIW 4 "s_register_operand")
- (match_operand:VDQIW 5 "s_register_operand")])
- (match_operand:VDQIW 1 "s_register_operand")
- (match_operand:VDQIW 2 "s_register_operand")))]
+ [(match_operand:<V_cmp_result> 4 "s_register_operand")
+ (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON"
{
- rtx mask;
- int inverse = 0, immediate_zero = 0;
-
- mask = gen_reg_rtx (<V_cmp_result>mode);
-
- if (operands[5] == CONST0_RTX (<MODE>mode))
- immediate_zero = 1;
- else if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
-
- switch (GET_CODE (operands[3]))
- {
- case GEU:
- emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
- break;
-
- case GTU:
- emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
- break;
-
- case EQ:
- emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
- break;
-
- case LEU:
- if (immediate_zero)
- emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
- else
- emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
- break;
-
- case LTU:
- if (immediate_zero)
- emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
- else
- emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
- break;
-
- case NE:
- emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
- inverse = 1;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (inverse)
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
- operands[1]));
- else
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
- operands[2]));
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
+ (match_operand:<V_cmp_result> 3 "s_register_operand")
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
+ "TARGET_NEON"
+{
+ emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
+ operands[2]));
DONE;
})
@@ -1823,7 +1591,7 @@
(match_operand:VCVTF 2 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
else
emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
@@ -1837,7 +1605,7 @@
(match_operand:VH 2 "s_register_operand")]
"TARGET_NEON_FP16INST"
{
- emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
@@ -1847,7 +1615,7 @@
(match_operand:VH 2 "s_register_operand")]
"TARGET_NEON_FP16INST"
{
- emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
@@ -1942,17 +1710,6 @@
(const_string "neon_mul_<V_elem_ch><q>")))]
)
-(define_insn "mul<mode>3"
- [(set
- (match_operand:VH 0 "s_register_operand" "=w")
- (mult:VH
- (match_operand:VH 1 "s_register_operand" "w")
- (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
-)
-
(define_insn "neon_vmulf<mode>"
[(set
(match_operand:VH 0 "s_register_operand" "=w")
@@ -1971,7 +1728,7 @@
(match_operand:VDQW 3 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
operands[2], operands[3]));
else
@@ -1999,8 +1756,8 @@
(match_operand:VH 3 "s_register_operand")]
"TARGET_NEON_FP16INST"
{
- emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
- operands[1]));
+ emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
+ operands[1]));
DONE;
})
@@ -2462,7 +2219,7 @@
(match_operand:VDQW 3 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
operands[1], operands[2], operands[3]));
else
@@ -2569,7 +2326,7 @@
(match_operand:VCVTF 2 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
else
emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
@@ -2644,7 +2401,7 @@
;; These may expand to an UNSPEC pattern when a floating point mode is used
;; without unsafe math optimizations.
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
[(match_operand:<V_cmp_result> 0 "s_register_operand")
(neg:<V_cmp_result>
(COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
@@ -2684,7 +2441,7 @@
}
)
-(define_insn "neon_vc<cmp_op><mode>_insn"
+(define_insn "@neon_vc<cmp_op><mode>_insn"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
(neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result>
@@ -2728,7 +2485,7 @@
[(set_attr "type" "neon_fp_compare_s<q>")]
)
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
[(match_operand:<V_cmp_result> 0 "s_register_operand")
(neg:<V_cmp_result>
(COMPARISONS:VH
@@ -2794,7 +2551,7 @@
}
[(set_attr "type" "neon_fp_compare_s<q>")])
-(define_insn "neon_vc<cmp_op>u<mode>"
+(define_insn "@neon_vc<code><mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
(neg:<V_cmp_result>
(GTUGEU:<V_cmp_result>
@@ -4751,7 +4508,7 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_bsl<q>")]
)
-(define_expand "neon_vbsl<mode>"
+(define_expand "@neon_vbsl<mode>"
[(set (match_operand:VDQX 0 "s_register_operand")
(unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
(match_operand:VDQX 2 "s_register_operand")
@@ -6658,7 +6415,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:VF 0 "s_register_operand" "=w")
(abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
(match_operand:VF 2 "s_register_operand" "w"))))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_abd_s<q>")]
)
@@ -6668,7 +6425,7 @@ if (BYTES_BIG_ENDIAN)
(abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
(match_operand:VF 2 "s_register_operand" "w")]
UNSPEC_VSUB)))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_abd_s<q>")]
)
diff --git a/gcc/config/arm/parsecpu.awk b/gcc/config/arm/parsecpu.awk
index 7fc3754..9423e8a 100644
--- a/gcc/config/arm/parsecpu.awk
+++ b/gcc/config/arm/parsecpu.awk
@@ -190,6 +190,23 @@ function gen_isa () {
ORS = z
print "\n"
}
+
+ print "struct fbit_implication {"
+ print " /* Represents a feature implication, where:"
+ print " ante IMPLIES cons"
+ print " meaning that if ante is enabled then we should"
+ print " also implicitly enable cons. */"
+ print " enum isa_feature ante;"
+ print " enum isa_feature cons;"
+ print "};\n"
+ print "static const struct fbit_implication all_implied_fbits[] ="
+ print "{"
+ for (impl in implied_bits) {
+ split (impl, impl_parts, SUBSEP)
+ print " { isa_bit_" impl_parts[2] ", isa_bit_" impl_parts[1] " },"
+ }
+ print " { isa_nobit, isa_nobit }"
+ print "};\n"
}
function gen_data () {
@@ -600,6 +617,40 @@ BEGIN {
parse_ok = 1
}
+/^define implied / {
+ if (NF < 4) fatal("syntax: define implied <name> [<feature-or-fgroup>]+\n" \
+ "Implied bits must be defined with at least one antecedent.")
+ toplevel()
+ fbit = $3
+ if (fbit in features) fatal("implied feature " fbit " aliases a real feature")
+ if (fbit in fgroup) fatal("implied feature " fbit " aliases a feature group")
+ fcount = NF
+ features[fbit] = 1
+ for (n = 4; n <= fcount; n++) {
+ ante = $n
+ if (fbit == ante) fatal("feature cannot imply itself")
+ else if (ante in features) {
+ for (impl in implied_bits) {
+ split(impl, impl_sep, SUBSEP)
+ if (ante == impl_sep[1])
+ fatal(ante " implies implied bit " fbit \
+ ". Chained implications not currently supported")
+ }
+ implied_bits[fbit, ante] = 1
+ } else if (ante in fgroup) {
+ for (bitcomb in fgrp_bits) {
+ split(bitcomb, bitsep, SUBSEP)
+ if (bitsep[1] == ante) {
+ implied_bits[fbit, bitsep[2]] = 1
+ }
+ }
+ } else {
+ fatal("implied bit antecedent " ante " unrecognized")
+ }
+ }
+ parse_ok = 1
+}
+
/^begin fpu / {
if (NF != 3) fatal("syntax: begin fpu <name>")
toplevel()
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 0a2399d..a3844e9 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -519,3 +519,803 @@
UNSPEC_BFMAB
UNSPEC_BFMAT
])
+
+;; Enumerators for MVE unspecs.
+(define_c_enum "unspec" [
+ VST4Q
+ VRNDXQ_F
+ VRNDQ_F
+ VRNDPQ_F
+ VRNDNQ_F
+ VRNDMQ_F
+ VRNDAQ_F
+ VREV64Q_F
+ VNEGQ_F
+ VDUPQ_N_F
+ VABSQ_F
+ VREV32Q_F
+ VCVTTQ_F32_F16
+ VCVTBQ_F32_F16
+ VCVTQ_TO_F_S
+ VQNEGQ_S
+ VCVTQ_TO_F_U
+ VREV16Q_S
+ VREV16Q_U
+ VADDLVQ_S
+ VMVNQ_N_S
+ VMVNQ_N_U
+ VCVTAQ_S
+ VCVTAQ_U
+ VREV64Q_S
+ VREV64Q_U
+ VQABSQ_S
+ VNEGQ_S
+ VMVNQ_S
+ VMVNQ_U
+ VDUPQ_N_U
+ VDUPQ_N_S
+ VCLZQ_U
+ VCLZQ_S
+ VCLSQ_S
+ VADDVQ_S
+ VADDVQ_U
+ VABSQ_S
+ VREV32Q_U
+ VREV32Q_S
+ VMOVLTQ_U
+ VMOVLTQ_S
+ VMOVLBQ_S
+ VMOVLBQ_U
+ VCVTQ_FROM_F_S
+ VCVTQ_FROM_F_U
+ VCVTPQ_S
+ VCVTPQ_U
+ VCVTNQ_S
+ VCVTNQ_U
+ VCVTMQ_S
+ VCVTMQ_U
+ VADDLVQ_U
+ VCTP8Q
+ VCTP16Q
+ VCTP32Q
+ VCTP64Q
+ VPNOT
+ VCREATEQ_F
+ VCVTQ_N_TO_F_S
+ VCVTQ_N_TO_F_U
+ VBRSRQ_N_F
+ VSUBQ_N_F
+ VCREATEQ_U
+ VCREATEQ_S
+ VSHRQ_N_S
+ VSHRQ_N_U
+ VCVTQ_N_FROM_F_S
+ VCVTQ_N_FROM_F_U
+ VADDLVQ_P_S
+ VADDLVQ_P_U
+ VCMPNEQ_U
+ VCMPNEQ_S
+ VSHLQ_S
+ VSHLQ_U
+ VABDQ_S
+ VADDQ_N_S
+ VADDVAQ_S
+ VADDVQ_P_S
+ VANDQ_S
+ VBICQ_S
+ VBRSRQ_N_S
+ VCADDQ_ROT270_S
+ VCADDQ_ROT90_S
+ VCMPEQQ_S
+ VCMPEQQ_N_S
+ VCMPNEQ_N_S
+ VEORQ_S
+ VHADDQ_S
+ VHADDQ_N_S
+ VHSUBQ_S
+ VHSUBQ_N_S
+ VMAXQ_S
+ VMAXVQ_S
+ VMINQ_S
+ VMINVQ_S
+ VMLADAVQ_S
+ VMULHQ_S
+ VMULLBQ_INT_S
+ VMULLTQ_INT_S
+ VMULQ_S
+ VMULQ_N_S
+ VORNQ_S
+ VORRQ_S
+ VQADDQ_S
+ VQADDQ_N_S
+ VQRSHLQ_S
+ VQRSHLQ_N_S
+ VQSHLQ_S
+ VQSHLQ_N_S
+ VQSHLQ_R_S
+ VQSUBQ_S
+ VQSUBQ_N_S
+ VRHADDQ_S
+ VRMULHQ_S
+ VRSHLQ_S
+ VRSHLQ_N_S
+ VRSHRQ_N_S
+ VSHLQ_N_S
+ VSHLQ_R_S
+ VSUBQ_S
+ VSUBQ_N_S
+ VABDQ_U
+ VADDQ_N_U
+ VADDVAQ_U
+ VADDVQ_P_U
+ VANDQ_U
+ VBICQ_U
+ VBRSRQ_N_U
+ VCADDQ_ROT270_U
+ VCADDQ_ROT90_U
+ VCMPEQQ_U
+ VCMPEQQ_N_U
+ VCMPNEQ_N_U
+ VEORQ_U
+ VHADDQ_U
+ VHADDQ_N_U
+ VHSUBQ_U
+ VHSUBQ_N_U
+ VMAXQ_U
+ VMAXVQ_U
+ VMINQ_U
+ VMINVQ_U
+ VMLADAVQ_U
+ VMULHQ_U
+ VMULLBQ_INT_U
+ VMULLTQ_INT_U
+ VMULQ_U
+ VMULQ_N_U
+ VORNQ_U
+ VORRQ_U
+ VQADDQ_U
+ VQADDQ_N_U
+ VQRSHLQ_U
+ VQRSHLQ_N_U
+ VQSHLQ_U
+ VQSHLQ_N_U
+ VQSHLQ_R_U
+ VQSUBQ_U
+ VQSUBQ_N_U
+ VRHADDQ_U
+ VRMULHQ_U
+ VRSHLQ_U
+ VRSHLQ_N_U
+ VRSHRQ_N_U
+ VSHLQ_N_U
+ VSHLQ_R_U
+ VSUBQ_U
+ VSUBQ_N_U
+ VCMPGEQ_N_S
+ VCMPGEQ_S
+ VCMPGTQ_N_S
+ VCMPGTQ_S
+ VCMPLEQ_N_S
+ VCMPLEQ_S
+ VCMPLTQ_N_S
+ VCMPLTQ_S
+ VHCADDQ_ROT270_S
+ VHCADDQ_ROT90_S
+ VMAXAQ_S
+ VMAXAVQ_S
+ VMINAQ_S
+ VMINAVQ_S
+ VMLADAVXQ_S
+ VMLSDAVQ_S
+ VMLSDAVXQ_S
+ VQDMULHQ_N_S
+ VQDMULHQ_S
+ VQRDMULHQ_N_S
+ VQRDMULHQ_S
+ VQSHLUQ_N_S
+ VCMPCSQ_N_U
+ VCMPCSQ_U
+ VCMPHIQ_N_U
+ VCMPHIQ_U
+ VABDQ_M_S
+ VABDQ_M_U
+ VABDQ_F
+ VADDQ_N_F
+ VANDQ_F
+ VBICQ_F
+ VCADDQ_ROT270_F
+ VCADDQ_ROT90_F
+ VCMPEQQ_F
+ VCMPEQQ_N_F
+ VCMPGEQ_F
+ VCMPGEQ_N_F
+ VCMPGTQ_F
+ VCMPGTQ_N_F
+ VCMPLEQ_F
+ VCMPLEQ_N_F
+ VCMPLTQ_F
+ VCMPLTQ_N_F
+ VCMPNEQ_F
+ VCMPNEQ_N_F
+ VCMULQ_F
+ VCMULQ_ROT180_F
+ VCMULQ_ROT270_F
+ VCMULQ_ROT90_F
+ VEORQ_F
+ VMAXNMAQ_F
+ VMAXNMAVQ_F
+ VMAXNMQ_F
+ VMAXNMVQ_F
+ VMINNMAQ_F
+ VMINNMAVQ_F
+ VMINNMQ_F
+ VMINNMVQ_F
+ VMULQ_F
+ VMULQ_N_F
+ VORNQ_F
+ VORRQ_F
+ VSUBQ_F
+ VADDLVAQ_U
+ VADDLVAQ_S
+ VBICQ_N_U
+ VBICQ_N_S
+ VCTP8Q_M
+ VCTP16Q_M
+ VCTP32Q_M
+ VCTP64Q_M
+ VCVTBQ_F16_F32
+ VCVTTQ_F16_F32
+ VMLALDAVQ_U
+ VMLALDAVXQ_U
+ VMLALDAVXQ_S
+ VMLALDAVQ_S
+ VMLSLDAVQ_S
+ VMLSLDAVXQ_S
+ VMOVNBQ_U
+ VMOVNBQ_S
+ VMOVNTQ_U
+ VMOVNTQ_S
+ VORRQ_N_S
+ VORRQ_N_U
+ VQDMULLBQ_N_S
+ VQDMULLBQ_S
+ VQDMULLTQ_N_S
+ VQDMULLTQ_S
+ VQMOVNBQ_U
+ VQMOVNBQ_S
+ VQMOVUNBQ_S
+ VQMOVUNTQ_S
+ VRMLALDAVHXQ_S
+ VRMLSLDAVHQ_S
+ VRMLSLDAVHXQ_S
+ VSHLLBQ_S
+ VSHLLBQ_U
+ VSHLLTQ_U
+ VSHLLTQ_S
+ VQMOVNTQ_U
+ VQMOVNTQ_S
+ VSHLLBQ_N_S
+ VSHLLBQ_N_U
+ VSHLLTQ_N_U
+ VSHLLTQ_N_S
+ VRMLALDAVHQ_U
+ VRMLALDAVHQ_S
+ VMULLTQ_POLY_P
+ VMULLBQ_POLY_P
+ VBICQ_M_N_S
+ VBICQ_M_N_U
+ VCMPEQQ_M_F
+ VCVTAQ_M_S
+ VCVTAQ_M_U
+ VCVTQ_M_TO_F_S
+ VCVTQ_M_TO_F_U
+ VQRSHRNBQ_N_U
+ VQRSHRNBQ_N_S
+ VQRSHRUNBQ_N_S
+ VRMLALDAVHAQ_S
+ VABAVQ_S
+ VABAVQ_U
+ VSHLCQ_S
+ VSHLCQ_U
+ VRMLALDAVHAQ_U
+ VABSQ_M_S
+ VADDVAQ_P_S
+ VADDVAQ_P_U
+ VCLSQ_M_S
+ VCLZQ_M_S
+ VCLZQ_M_U
+ VCMPCSQ_M_N_U
+ VCMPCSQ_M_U
+ VCMPEQQ_M_N_S
+ VCMPEQQ_M_N_U
+ VCMPEQQ_M_S
+ VCMPEQQ_M_U
+ VCMPGEQ_M_N_S
+ VCMPGEQ_M_S
+ VCMPGTQ_M_N_S
+ VCMPGTQ_M_S
+ VCMPHIQ_M_N_U
+ VCMPHIQ_M_U
+ VCMPLEQ_M_N_S
+ VCMPLEQ_M_S
+ VCMPLTQ_M_N_S
+ VCMPLTQ_M_S
+ VCMPNEQ_M_N_S
+ VCMPNEQ_M_N_U
+ VCMPNEQ_M_S
+ VCMPNEQ_M_U
+ VDUPQ_M_N_S
+ VDUPQ_M_N_U
+ VDWDUPQ_N_U
+ VDWDUPQ_WB_U
+ VIWDUPQ_N_U
+ VIWDUPQ_WB_U
+ VMAXAQ_M_S
+ VMAXAVQ_P_S
+ VMAXVQ_P_S
+ VMAXVQ_P_U
+ VMINAQ_M_S
+ VMINAVQ_P_S
+ VMINVQ_P_S
+ VMINVQ_P_U
+ VMLADAVAQ_S
+ VMLADAVAQ_U
+ VMLADAVQ_P_S
+ VMLADAVQ_P_U
+ VMLADAVXQ_P_S
+ VMLAQ_N_S
+ VMLAQ_N_U
+ VMLASQ_N_S
+ VMLASQ_N_U
+ VMLSDAVQ_P_S
+ VMLSDAVXQ_P_S
+ VMVNQ_M_S
+ VMVNQ_M_U
+ VNEGQ_M_S
+ VPSELQ_S
+ VPSELQ_U
+ VQABSQ_M_S
+ VQDMLAHQ_N_S
+ VQDMLASHQ_N_S
+ VQNEGQ_M_S
+ VQRDMLADHQ_S
+ VQRDMLADHXQ_S
+ VQRDMLAHQ_N_S
+ VQRDMLASHQ_N_S
+ VQRDMLSDHQ_S
+ VQRDMLSDHXQ_S
+ VQRSHLQ_M_N_S
+ VQRSHLQ_M_N_U
+ VQSHLQ_M_R_S
+ VQSHLQ_M_R_U
+ VREV64Q_M_S
+ VREV64Q_M_U
+ VRSHLQ_M_N_S
+ VRSHLQ_M_N_U
+ VSHLQ_M_R_S
+ VSHLQ_M_R_U
+ VSLIQ_N_S
+ VSLIQ_N_U
+ VSRIQ_N_S
+ VSRIQ_N_U
+ VQDMLSDHXQ_S
+ VQDMLSDHQ_S
+ VQDMLADHXQ_S
+ VQDMLADHQ_S
+ VMLSDAVAXQ_S
+ VMLSDAVAQ_S
+ VMLADAVAXQ_S
+ VCMPGEQ_M_F
+ VCMPGTQ_M_N_F
+ VMLSLDAVQ_P_S
+ VRMLALDAVHAXQ_S
+ VMLSLDAVXQ_P_S
+ VFMAQ_F
+ VMLSLDAVAQ_S
+ VQSHRUNBQ_N_S
+ VQRSHRUNTQ_N_S
+ VCMLAQ_F
+ VMINNMAQ_M_F
+ VFMASQ_N_F
+ VDUPQ_M_N_F
+ VCMPGTQ_M_F
+ VCMPLTQ_M_F
+ VRMLSLDAVHQ_P_S
+ VQSHRUNTQ_N_S
+ VABSQ_M_F
+ VMAXNMAVQ_P_F
+ VFMAQ_N_F
+ VRMLSLDAVHXQ_P_S
+ VREV32Q_M_F
+ VRMLSLDAVHAQ_S
+ VRMLSLDAVHAXQ_S
+ VCMPLTQ_M_N_F
+ VCMPNEQ_M_F
+ VRNDAQ_M_F
+ VRNDPQ_M_F
+ VADDLVAQ_P_S
+ VQMOVUNBQ_M_S
+ VCMPLEQ_M_F
+ VCMLAQ_ROT180_F
+ VMLSLDAVAXQ_S
+ VRNDXQ_M_F
+ VFMSQ_F
+ VMINNMVQ_P_F
+ VMAXNMVQ_P_F
+ VPSELQ_F
+ VCMLAQ_ROT90_F
+ VQMOVUNTQ_M_S
+ VREV64Q_M_F
+ VNEGQ_M_F
+ VRNDMQ_M_F
+ VCMPLEQ_M_N_F
+ VCMPGEQ_M_N_F
+ VRNDNQ_M_F
+ VMINNMAVQ_P_F
+ VCMPNEQ_M_N_F
+ VRMLALDAVHQ_P_S
+ VRMLALDAVHXQ_P_S
+ VCMPEQQ_M_N_F
+ VCMLAQ_ROT270_F
+ VMAXNMAQ_M_F
+ VRNDQ_M_F
+ VMLALDAVQ_P_U
+ VMLALDAVQ_P_S
+ VQMOVNBQ_M_S
+ VQMOVNBQ_M_U
+ VMOVLTQ_M_U
+ VMOVLTQ_M_S
+ VMOVNBQ_M_U
+ VMOVNBQ_M_S
+ VRSHRNTQ_N_U
+ VRSHRNTQ_N_S
+ VORRQ_M_N_S
+ VORRQ_M_N_U
+ VREV32Q_M_S
+ VREV32Q_M_U
+ VQRSHRNTQ_N_U
+ VQRSHRNTQ_N_S
+ VMOVNTQ_M_U
+ VMOVNTQ_M_S
+ VMOVLBQ_M_U
+ VMOVLBQ_M_S
+ VMLALDAVAQ_S
+ VMLALDAVAQ_U
+ VQSHRNBQ_N_U
+ VQSHRNBQ_N_S
+ VSHRNBQ_N_U
+ VSHRNBQ_N_S
+ VRSHRNBQ_N_S
+ VRSHRNBQ_N_U
+ VMLALDAVXQ_P_U
+ VMLALDAVXQ_P_S
+ VQMOVNTQ_M_U
+ VQMOVNTQ_M_S
+ VMVNQ_M_N_U
+ VMVNQ_M_N_S
+ VQSHRNTQ_N_U
+ VQSHRNTQ_N_S
+ VMLALDAVAXQ_S
+ VMLALDAVAXQ_U
+ VSHRNTQ_N_S
+ VSHRNTQ_N_U
+ VCVTBQ_M_F16_F32
+ VCVTBQ_M_F32_F16
+ VCVTTQ_M_F16_F32
+ VCVTTQ_M_F32_F16
+ VCVTMQ_M_S
+ VCVTMQ_M_U
+ VCVTNQ_M_S
+ VCVTPQ_M_S
+ VCVTPQ_M_U
+ VCVTQ_M_N_FROM_F_S
+ VCVTNQ_M_U
+ VREV16Q_M_S
+ VREV16Q_M_U
+ VREV32Q_M
+ VCVTQ_M_FROM_F_U
+ VCVTQ_M_FROM_F_S
+ VRMLALDAVHQ_P_U
+ VADDLVAQ_P_U
+ VCVTQ_M_N_FROM_F_U
+ VQSHLUQ_M_N_S
+ VABAVQ_P_S
+ VABAVQ_P_U
+ VSHLQ_M_S
+ VSHLQ_M_U
+ VSRIQ_M_N_S
+ VSRIQ_M_N_U
+ VSUBQ_M_U
+ VSUBQ_M_S
+ VCVTQ_M_N_TO_F_U
+ VCVTQ_M_N_TO_F_S
+ VQADDQ_M_U
+ VQADDQ_M_S
+ VRSHRQ_M_N_S
+ VSUBQ_M_N_S
+ VSUBQ_M_N_U
+ VBRSRQ_M_N_S
+ VSUBQ_M_N_F
+ VBICQ_M_F
+ VHADDQ_M_U
+ VBICQ_M_U
+ VBICQ_M_S
+ VMULQ_M_N_U
+ VHADDQ_M_S
+ VORNQ_M_F
+ VMLAQ_M_N_S
+ VQSUBQ_M_U
+ VQSUBQ_M_S
+ VMLAQ_M_N_U
+ VQSUBQ_M_N_U
+ VQSUBQ_M_N_S
+ VMULLTQ_INT_M_S
+ VMULLTQ_INT_M_U
+ VMULQ_M_N_S
+ VMULQ_M_N_F
+ VMLASQ_M_N_U
+ VMLASQ_M_N_S
+ VMAXQ_M_U
+ VQRDMLAHQ_M_N_U
+ VCADDQ_ROT270_M_F
+ VCADDQ_ROT270_M_U
+ VCADDQ_ROT270_M_S
+ VQRSHLQ_M_S
+ VMULQ_M_F
+ VRHADDQ_M_U
+ VSHRQ_M_N_U
+ VRHADDQ_M_S
+ VMULQ_M_S
+ VMULQ_M_U
+ VQDMLASHQ_M_N_S
+ VQRDMLASHQ_M_N_S
+ VRSHLQ_M_S
+ VRSHLQ_M_U
+ VRSHRQ_M_N_U
+ VADDQ_M_N_F
+ VADDQ_M_N_S
+ VADDQ_M_N_U
+ VQRDMLASHQ_M_N_U
+ VMAXQ_M_S
+ VQRDMLAHQ_M_N_S
+ VORRQ_M_S
+ VORRQ_M_U
+ VORRQ_M_F
+ VQRSHLQ_M_U
+ VRMULHQ_M_U
+ VRMULHQ_M_S
+ VMINQ_M_S
+ VMINQ_M_U
+ VANDQ_M_F
+ VANDQ_M_U
+ VANDQ_M_S
+ VHSUBQ_M_N_S
+ VHSUBQ_M_N_U
+ VMULHQ_M_S
+ VMULHQ_M_U
+ VMULLBQ_INT_M_U
+ VMULLBQ_INT_M_S
+ VCADDQ_ROT90_M_F
+ VSHRQ_M_N_S
+ VADDQ_M_U
+ VSLIQ_M_N_U
+ VQADDQ_M_N_S
+ VBRSRQ_M_N_F
+ VABDQ_M_F
+ VBRSRQ_M_N_U
+ VEORQ_M_F
+ VSHLQ_M_N_S
+ VQDMLAHQ_M_N_U
+ VQDMLAHQ_M_N_S
+ VSHLQ_M_N_U
+ VMLADAVAQ_P_U
+ VMLADAVAQ_P_S
+ VSLIQ_M_N_S
+ VQSHLQ_M_U
+ VQSHLQ_M_S
+ VCADDQ_ROT90_M_U
+ VCADDQ_ROT90_M_S
+ VORNQ_M_U
+ VORNQ_M_S
+ VQSHLQ_M_N_S
+ VQSHLQ_M_N_U
+ VADDQ_M_S
+ VHADDQ_M_N_S
+ VADDQ_M_F
+ VQADDQ_M_N_U
+ VEORQ_M_S
+ VEORQ_M_U
+ VHSUBQ_M_S
+ VHSUBQ_M_U
+ VHADDQ_M_N_U
+ VHCADDQ_ROT90_M_S
+ VQRDMLSDHQ_M_S
+ VQRDMLSDHXQ_M_S
+ VQRDMLADHXQ_M_S
+ VQDMULHQ_M_S
+ VMLADAVAXQ_P_S
+ VQDMLADHXQ_M_S
+ VQRDMULHQ_M_S
+ VMLSDAVAXQ_P_S
+ VQDMULHQ_M_N_S
+ VHCADDQ_ROT270_M_S
+ VQDMLSDHQ_M_S
+ VQDMLSDHXQ_M_S
+ VMLSDAVAQ_P_S
+ VQRDMLADHQ_M_S
+ VQDMLADHQ_M_S
+ VMLALDAVAQ_P_U
+ VMLALDAVAQ_P_S
+ VQRSHRNBQ_M_N_U
+ VQRSHRNBQ_M_N_S
+ VQRSHRNTQ_M_N_S
+ VQSHRNBQ_M_N_U
+ VQSHRNBQ_M_N_S
+ VQSHRNTQ_M_N_S
+ VRSHRNBQ_M_N_U
+ VRSHRNBQ_M_N_S
+ VRSHRNTQ_M_N_U
+ VSHLLBQ_M_N_U
+ VSHLLBQ_M_N_S
+ VSHLLTQ_M_N_U
+ VSHLLTQ_M_N_S
+ VSHRNBQ_M_N_S
+ VSHRNBQ_M_N_U
+ VSHRNTQ_M_N_S
+ VSHRNTQ_M_N_U
+ VMLALDAVAXQ_P_S
+ VQRSHRNTQ_M_N_U
+ VQSHRNTQ_M_N_U
+ VRSHRNTQ_M_N_S
+ VQRDMULHQ_M_N_S
+ VRMLALDAVHAQ_P_S
+ VMLSLDAVAQ_P_S
+ VMLSLDAVAXQ_P_S
+ VMULLBQ_POLY_M_P
+ VMULLTQ_POLY_M_P
+ VQDMULLBQ_M_N_S
+ VQDMULLBQ_M_S
+ VQDMULLTQ_M_N_S
+ VQDMULLTQ_M_S
+ VQRSHRUNBQ_M_N_S
+ VQSHRUNBQ_M_N_S
+ VQSHRUNTQ_M_N_S
+ VRMLALDAVHAQ_P_U
+ VRMLALDAVHAXQ_P_S
+ VRMLSLDAVHAQ_P_S
+ VRMLSLDAVHAXQ_P_S
+ VQRSHRUNTQ_M_N_S
+ VCMLAQ_M_F
+ VCMLAQ_ROT180_M_F
+ VCMLAQ_ROT270_M_F
+ VCMLAQ_ROT90_M_F
+ VCMULQ_M_F
+ VCMULQ_ROT180_M_F
+ VCMULQ_ROT270_M_F
+ VCMULQ_ROT90_M_F
+ VFMAQ_M_F
+ VFMAQ_M_N_F
+ VFMASQ_M_N_F
+ VFMSQ_M_F
+ VMAXNMQ_M_F
+ VMINNMQ_M_F
+ VSUBQ_M_F
+ VSTRWQSB_S
+ VSTRWQSB_U
+ VSTRBQSO_S
+ VSTRBQSO_U
+ VSTRBQ_S
+ VSTRBQ_U
+ VLDRBQGO_S
+ VLDRBQGO_U
+ VLDRBQ_S
+ VLDRBQ_U
+ VLDRWQGB_S
+ VLDRWQGB_U
+ VLD1Q_F
+ VLD1Q_S
+ VLD1Q_U
+ VLDRHQ_F
+ VLDRHQGO_S
+ VLDRHQGO_U
+ VLDRHQGSO_S
+ VLDRHQGSO_U
+ VLDRHQ_S
+ VLDRHQ_U
+ VLDRWQ_F
+ VLDRWQ_S
+ VLDRWQ_U
+ VLDRDQGB_S
+ VLDRDQGB_U
+ VLDRDQGO_S
+ VLDRDQGO_U
+ VLDRDQGSO_S
+ VLDRDQGSO_U
+ VLDRHQGO_F
+ VLDRHQGSO_F
+ VLDRWQGB_F
+ VLDRWQGO_F
+ VLDRWQGO_S
+ VLDRWQGO_U
+ VLDRWQGSO_F
+ VLDRWQGSO_S
+ VLDRWQGSO_U
+ VSTRHQ_F
+ VST1Q_S
+ VST1Q_U
+ VSTRHQSO_S
+ VSTRHQ_U
+ VSTRWQ_S
+ VSTRWQ_U
+ VSTRWQ_F
+ VST1Q_F
+ VSTRDQSB_S
+ VSTRDQSB_U
+ VSTRDQSO_S
+ VSTRDQSO_U
+ VSTRDQSSO_S
+ VSTRDQSSO_U
+ VSTRWQSO_S
+ VSTRWQSO_U
+ VSTRWQSSO_S
+ VSTRWQSSO_U
+ VSTRHQSO_F
+ VSTRHQSSO_F
+ VSTRWQSB_F
+ VSTRWQSO_F
+ VSTRWQSSO_F
+ VDDUPQ
+ VDDUPQ_M
+ VDWDUPQ
+ VDWDUPQ_M
+ VIDUPQ
+ VIDUPQ_M
+ VIWDUPQ
+ VIWDUPQ_M
+ VSTRWQSBWB_S
+ VSTRWQSBWB_U
+ VLDRWQGBWB_S
+ VLDRWQGBWB_U
+ VSTRWQSBWB_F
+ VLDRWQGBWB_F
+ VSTRDQSBWB_S
+ VSTRDQSBWB_U
+ VLDRDQGBWB_S
+ VLDRDQGBWB_U
+ VADCQ_U
+ VADCQ_M_U
+ VADCQ_S
+ VADCQ_M_S
+ VSBCIQ_U
+ VSBCIQ_S
+ VSBCIQ_M_U
+ VSBCIQ_M_S
+ VSBCQ_U
+ VSBCQ_S
+ VSBCQ_M_U
+ VSBCQ_M_S
+ VADCIQ_U
+ VADCIQ_M_U
+ VADCIQ_S
+ VADCIQ_M_S
+ VLD2Q
+ VLD4Q
+ VST2Q
+ VSHLCQ_M_U
+ VSHLCQ_M_S
+ VSTRHQSO_U
+ VSTRHQSSO_S
+ VSTRHQSSO_U
+ VSTRHQ_S
+ SRSHRL
+ SRSHR
+ URSHR
+ URSHRL
+ SQRSHR
+ UQRSHL
+ UQRSHLL_64
+ UQRSHLL_48
+ SQRSHRL_64
+ SQRSHRL_48
+ VSHLCQ_M_
+])
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index b7e3619..250e503 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -81,104 +81,53 @@
;; patterns separately for Neon, IWMMXT and MVE.
(define_expand "add<mode>3"
- [(set (match_operand:VNIM 0 "s_register_operand")
- (plus:VNIM (match_operand:VNIM 1 "s_register_operand")
- (match_operand:VNIM 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))
- || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE(<MODE>mode))
- || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(<MODE>mode))"
-{
-})
-
-;; Vector arithmetic. Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon and MVE.
-
-(define_expand "addv8hf3"
- [(set (match_operand:V8HF 0 "s_register_operand")
- (plus:V8HF (match_operand:V8HF 1 "s_register_operand")
- (match_operand:V8HF 2 "s_register_operand")))]
- "(TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(V8HFmode))
- || (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)"
-{
- if (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)
- emit_insn (gen_addv8hf3_neon (operands[0], operands[1], operands[2]));
-})
-
-;; Vector arithmetic. Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon and IWMMXT.
-
-(define_expand "add<mode>3"
- [(set (match_operand:VNINOTM 0 "s_register_operand")
- (plus:VNINOTM (match_operand:VNINOTM 1 "s_register_operand")
- (match_operand:VNINOTM 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
-
-;; Vector arithmetic. Expanders are blank, then unnamed insns implement
-;; patterns separately for IWMMXT and Neon.
+ [(set (match_operand:VDQ 0 "s_register_operand")
+ (plus:VDQ (match_operand:VDQ 1 "s_register_operand")
+ (match_operand:VDQ 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "sub<mode>3"
- [(set (match_operand:VALL 0 "s_register_operand")
- (minus:VALL (match_operand:VALL 1 "s_register_operand")
- (match_operand:VALL 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+ [(set (match_operand:VDQ 0 "s_register_operand")
+ (minus:VDQ (match_operand:VDQ 1 "s_register_operand")
+ (match_operand:VDQ 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "mul<mode>3"
- [(set (match_operand:VALLW 0 "s_register_operand")
- (mult:VALLW (match_operand:VALLW 1 "s_register_operand")
- (match_operand:VALLW 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
-{
-})
+ [(set (match_operand:VDQWH 0 "s_register_operand")
+ (mult:VDQWH (match_operand:VDQWH 1 "s_register_operand")
+ (match_operand:VDQWH 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "smin<mode>3"
[(set (match_operand:VALLW 0 "s_register_operand")
(smin:VALLW (match_operand:VALLW 1 "s_register_operand")
(match_operand:VALLW 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "umin<mode>3"
[(set (match_operand:VINTW 0 "s_register_operand")
(umin:VINTW (match_operand:VINTW 1 "s_register_operand")
(match_operand:VINTW 2 "s_register_operand")))]
- "TARGET_NEON
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "smax<mode>3"
[(set (match_operand:VALLW 0 "s_register_operand")
(smax:VALLW (match_operand:VALLW 1 "s_register_operand")
(match_operand:VALLW 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "umax<mode>3"
[(set (match_operand:VINTW 0 "s_register_operand")
(umax:VINTW (match_operand:VINTW 1 "s_register_operand")
(match_operand:VINTW 2 "s_register_operand")))]
- "TARGET_NEON
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "vec_perm<mode>"
[(match_operand:VE 0 "s_register_operand")
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 6a2bc5a..e6c287c 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -387,31 +387,15 @@
(set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")]
)
-(define_insn "*mov_load_vfp_hf16"
- [(set (match_operand:HF 0 "s_register_operand" "=t")
- (match_operand:HF 1 "memory_operand" "Uj"))]
- "TARGET_HAVE_MVE_FLOAT"
- "vldr.16\\t%0, %E1"
-)
-
-(define_insn "*mov_store_vfp_hf16"
- [(set (match_operand:HF 0 "memory_operand" "=Uj")
- (match_operand:HF 1 "s_register_operand" "t"))]
- "TARGET_HAVE_MVE_FLOAT"
- "vstr.16\\t%1, %E0"
-)
-
;; HFmode and BFmode moves
(define_insn "*mov<mode>_vfp_<mode>16"
[(set (match_operand:HFBF 0 "nonimmediate_operand"
- "= ?r,?m,t,r,t,r,t, t, Um,r")
+ "= ?r,?m,t,r,t,r,t, t, Uj,r")
(match_operand:HFBF 1 "general_operand"
- " m,r,t,r,r,t,Dv,Um,t, F"))]
+ " m,r,t,r,r,t,Dv,Uj,t, F"))]
"TARGET_32BIT
- && TARGET_VFP_FP16INST
- && arm_mve_mode_and_operands_type_check (<MODE>mode, operands[0],
- operands[1])
+ && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
&& (s_register_operand (operands[0], <MODE>mode)
|| s_register_operand (operands[1], <MODE>mode))"
{
@@ -430,9 +414,15 @@
case 6: /* S register from immediate. */
return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";
case 7: /* S register from memory. */
- return \"vld1.16\\t{%z0}, %A1\";
+ if (TARGET_HAVE_MVE)
+ return \"vldr.16\\t%0, %1\";
+ else
+ return \"vld1.16\\t{%z0}, %A1\";
case 8: /* Memory from S register. */
- return \"vst1.16\\t{%z1}, %A0\";
+ if (TARGET_HAVE_MVE)
+ return \"vstr.16\\t%1, %0\";
+ else
+ return \"vst1.16\\t{%z1}, %A0\";
case 9: /* ARM register from constant. */
{
long bits;
@@ -2135,7 +2125,7 @@
(match_operand:DF 1 "const_double_operand" "F"))
(clobber (match_operand:DF 2 "s_register_operand" "=r"))]
"arm_disable_literal_pool
- && TARGET_HARD_FLOAT
+ && TARGET_VFP_BASE
&& !arm_const_double_rtx (operands[1])
&& !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1]))"
"#"
@@ -2161,7 +2151,7 @@
(match_operand:SF 1 "const_double_operand" "E"))
(clobber (match_operand:SF 2 "s_register_operand" "=r"))]
"arm_disable_literal_pool
- && TARGET_HARD_FLOAT
+ && TARGET_VFP_BASE
&& !vfp3_const_double_rtx (operands[1])"
"#"
""
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 2ebfce8..487ec0f 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -44,7 +44,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
\
if (arm_arch_xscale) \
builtin_define ("_VX_CPU=XSCALE"); \
- if (arm_arch8) \
+ else if (arm_arch8) \
builtin_define ("_VX_CPU=ARMARCH8A"); \
else if (arm_arch7) \
{ \
diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index 769d8ea..8e7cf50 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -165,6 +165,16 @@
"div<msuffix>\t%0,%2"
[(set_attr "type" "<mtype>")])
+;; However, xBPF does provide a signed division operator, sdiv.
+
+(define_insn "div<AM:mode>3"
+ [(set (match_operand:AM 0 "register_operand" "=r,r")
+ (div:AM (match_operand:AM 1 "register_operand" " 0,0")
+ (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
+ "TARGET_XBPF"
+ "sdiv<msuffix>\t%0,%2"
+ [(set_attr "type" "<mtype>")])
+
;;; Modulus
;; Note that eBPF doesn't provide instructions for signed integer
@@ -178,6 +188,16 @@
"mod<msuffix>\t%0,%2"
[(set_attr "type" "<mtype>")])
+;; Again, xBPF provides a signed version, smod.
+
+(define_insn "mod<AM:mode>3"
+ [(set (match_operand:AM 0 "register_operand" "=r,r")
+ (mod:AM (match_operand:AM 1 "register_operand" " 0,0")
+ (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
+ "TARGET_XBPF"
+ "smod<msuffix>\t%0,%2"
+ [(set_attr "type" "<mtype>")])
+
;;; Logical AND
(define_insn "and<AM:mode>3"
[(set (match_operand:AM 0 "register_operand" "=r,r")
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index 54cd1e4..49c540f 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -125,6 +125,6 @@ extern bool darwin_kextabi_p (void);
extern void darwin_override_options (void);
extern void darwin_patch_builtins (void);
extern void darwin_rename_builtins (void);
-extern bool darwin_libc_has_function (enum function_class fn_class);
+extern bool darwin_libc_has_function (enum function_class fn_class, tree);
#endif /* CONFIG_DARWIN_PROTOS_H */
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index 98677f6..65bf5ad 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -198,3 +198,18 @@ DEF_SECTION (objc2_image_info_section, 0,
".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1)
DEF_SECTION (objc2_constant_string_object_section, 0,
".section __DATA, __objc_stringobj, regular, no_dead_strip", 1)
+
+/* Additions for compatibility with later runtime conventions especially for
+ sections containing strings. */
+DEF_SECTION (objc2_data_section, 0, ".section __DATA, __data", 1)
+
+DEF_SECTION (objc2_ivar_section, 0, ".section __DATA, __objc_ivar", 1)
+
+DEF_SECTION (objc2_class_names_section, 0,
+ ".section __TEXT, __objc_classname, cstring_literals", 1)
+
+DEF_SECTION (objc2_method_names_section, 0,
+ ".section __TEXT, __objc_methname, cstring_literals", 1)
+
+DEF_SECTION (objc2_method_types_section, 0,
+ ".section __TEXT, __objc_methtype, cstring_literals", 1)
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index c8edfb8..dd4857f 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -136,7 +136,7 @@ output_objc_section_asm_op (const void *directive)
order in the object. The code below implements this by emitting
a section header for each ObjC section the first time that an ObjC
section is requested. */
- if (! been_here)
+ if (darwin_symbol_stubs && ! been_here)
{
section *saved_in_section = in_section;
static const enum darwin_section_enum tomark[] =
@@ -174,20 +174,23 @@ output_objc_section_asm_op (const void *directive)
/* ABI=2 */
static const enum darwin_section_enum tomarkv2[] =
{
+ objc2_method_names_section,
objc2_message_refs_section,
+ objc2_selector_refs_section,
+ objc2_ivar_section,
objc2_classdefs_section,
objc2_metadata_section,
objc2_classrefs_section,
+ objc2_class_names_section,
objc2_classlist_section,
objc2_categorylist_section,
- objc2_selector_refs_section,
objc2_nonlazy_class_section,
objc2_nonlazy_category_section,
objc2_protocollist_section,
objc2_protocolrefs_section,
objc2_super_classrefs_section,
+ objc2_constant_string_object_section,
objc2_image_info_section,
- objc2_constant_string_object_section
} ;
size_t i;
@@ -1436,7 +1439,7 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
p = IDENTIFIER_POINTER (ident);
- gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi == 2);
+ gcc_checking_assert (flag_next_runtime >= 1 && flag_objc_abi == 2);
objc_metadata_seen = 1;
@@ -1447,11 +1450,20 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
first. */
if (!strncmp (p, "V2_BASE", 7))
return base;
+ else if (!strncmp (p, "V2_CNAM", 7))
+ return darwin_sections[objc2_class_names_section];
+ else if (!strncmp (p, "V2_MNAM", 7))
+ return darwin_sections[objc2_method_names_section];
+ else if (!strncmp (p, "V2_MTYP", 7))
+ return darwin_sections[objc2_method_types_section];
else if (!strncmp (p, "V2_STRG", 7))
return darwin_sections[cstring_section];
else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7))
return darwin_sections[objc2_classdefs_section];
+ else if (!strncmp (p, "V2_PCOL", 7))
+ return ld_uses_coal_sects ? darwin_sections[data_coal_section]
+ : darwin_sections[objc2_data_section];
else if (!strncmp (p, "V2_MREF", 7))
return darwin_sections[objc2_message_refs_section];
else if (!strncmp (p, "V2_CLRF", 7))
@@ -1487,6 +1499,9 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
else if (!strncmp (p, "V2_CSTR", 7))
return darwin_sections[objc2_constant_string_object_section];
+ else if (!strncmp (p, "V2_IVRF", 7))
+ return darwin_sections[objc2_ivar_section];
+
/* Not recognized, default. */
return base;
}
@@ -1500,7 +1515,7 @@ darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
p = IDENTIFIER_POINTER (ident);
- gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi < 2);
+ gcc_checking_assert (flag_next_runtime >= 1 && flag_objc_abi < 2);
objc_metadata_seen = 1;
@@ -1861,6 +1876,14 @@ darwin_globalize_label (FILE *stream, const char *name)
{
if (!!strncmp (name, "_OBJC_", 6))
default_globalize_label (stream, name);
+ /* We have some Objective C cases that need to be global, but only on newer
+ OS versions. */
+ if (flag_objc_abi < 2 || flag_next_runtime < 100700)
+ return;
+ if (!strncmp (name+6, "LabelPro", 8))
+ default_globalize_label (stream, name);
+ if (!strncmp (name+6, "Protocol_", 9))
+ default_globalize_label (stream, name);
}
/* This routine returns non-zero if 'name' starts with the special objective-c
@@ -1879,7 +1902,49 @@ darwin_label_is_anonymous_local_objc_name (const char *name)
while (*p >= '0' && *p <= '9')
p++;
}
- return (!strncmp ((const char *)p, "_OBJC_", 6));
+ if (strncmp ((const char *)p, "_OBJC_", 6) != 0)
+ return false;
+
+ /* We need some of the objective c meta-data symbols to be visible to the
+ linker (when the target OS version is newer). FIXME: this is horrible,
+ we need a better mechanism. */
+
+ if (flag_objc_abi < 2 || flag_next_runtime < 100700)
+ return true;
+
+ p += 6;
+ if (!strncmp ((const char *)p, "ClassRef", 8))
+ return false;
+ else if (!strncmp ((const char *)p, "SelRef", 6))
+ return false;
+ else if (!strncmp ((const char *)p, "Category", 8))
+ {
+ if (p[8] == '_' || p[8] == 'I' || p[8] == 'P' || p[8] == 'C' )
+ return false;
+ return true;
+ }
+ else if (!strncmp ((const char *)p, "ClassMethods", 12))
+ return false;
+ else if (!strncmp ((const char *)p, "Instance", 8))
+ {
+ if (p[8] == 'I' || p[8] == 'M')
+ return false;
+ return true;
+ }
+ else if (!strncmp ((const char *)p, "CLASS_RO", 8))
+ return false;
+ else if (!strncmp ((const char *)p, "METACLASS_RO", 12))
+ return false;
+ else if (!strncmp ((const char *)p, "Protocol", 8))
+ {
+ if (p[8] == '_' || p[8] == 'I' || p[8] == 'P'
+ || p[8] == 'M' || p[8] == 'C' || p[8] == 'O')
+ return false;
+ return true;
+ }
+ else if (!strncmp ((const char *)p, "LabelPro", 8))
+ return false;
+ return true;
}
/* LTO support for Mach-O.
@@ -2384,11 +2449,7 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
unsigned HOST_WIDE_INT size,
unsigned int l2align)
{
- /* FIXME: We have a fudge to make this work with Java even when the target does
- not use sections anchors -- Java seems to need at least one small item in a
- non-zerofill segment. */
- if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
- || (size && size <= 2))
+ if (DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
{
/* Put smaller objects in _static_data, where the section anchors system
can get them.
@@ -2414,16 +2475,13 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
}
else
{
- /* When we are on a non-section anchor target, we can get zero-sized
- items here. However, all we need to do is to bump them to one byte
- and the section alignment will take care of the rest. */
+ /* When we are on a non-section anchor target (or not using section
+ anchors, we can get zero-sized items here. However, all we need to
+ do is to bump them to one byte and the section alignment will take
+ care of the rest. */
char secnam[64];
- unsigned int flags ;
- snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"),
- (unsigned) l2align);
- /* We can't anchor (yet, if ever) in zerofill sections, because we can't
- switch to them and emit a label. */
- flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+ snprintf (secnam, 64, "__DATA,__bss");
+ unsigned int flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
in_section = get_section (secnam, flags, NULL);
fprintf (fp, "\t.zerofill %s,", secnam);
assemble_name (fp, name);
@@ -2434,7 +2492,7 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
size, (unsigned) l2align);
else
- fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+ fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",0\n", size);
}
(*targetm.encode_section_info) (decl, DECL_RTL (decl), false);
@@ -2559,9 +2617,8 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
return;
}
- /* So we have a public symbol (small item fudge for Java, see above). */
- if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
- || (size && size <= 2))
+ /* So we have a public symbol. */
+ if (DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
{
/* Put smaller objects in data, where the section anchors system can get
them. However, if they are zero-sized punt them to yet a different
@@ -2586,16 +2643,10 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
}
else
{
+ /* Section anchors not in use. */
+ unsigned int flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
char secnam[64];
- unsigned int flags ;
- /* When we are on a non-section anchor target, we can get zero-sized
- items here. However, all we need to do is to bump them to one byte
- and the section alignment will take care of the rest. */
- snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align);
-
- /* We can't anchor in zerofill sections, because we can't switch
- to them and emit a label. */
- flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+ snprintf (secnam, 64, "__DATA,__common");
in_section = get_section (secnam, flags, NULL);
fprintf (fp, "\t.zerofill %s,", secnam);
assemble_name (fp, name);
@@ -2605,7 +2656,7 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
if (l2align)
fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align);
else
- fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+ fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",0\n", size);
}
(* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
}
@@ -3141,10 +3192,14 @@ darwin_override_options (void)
/* Keep track of which (major) version we're generating code for. */
if (darwin_macosx_version_min)
{
- if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
+ if (strverscmp (darwin_macosx_version_min, "10.7") >= 0)
+ generating_for_darwin_version = 11;
+ else if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
generating_for_darwin_version = 10;
else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0)
generating_for_darwin_version = 9;
+ else if (strverscmp (darwin_macosx_version_min, "10.4") >= 0)
+ generating_for_darwin_version = 8;
/* Earlier versions are not specifically accounted, until required. */
}
@@ -3160,6 +3215,20 @@ darwin_override_options (void)
should check for correctness re. the ABI. TODO: check and provide the
flags (runtime & ABI) from the lto wrapper). */
+ /* At present, make a hard update to the runtime version based on the target
+ OS version. */
+ if (flag_next_runtime)
+ {
+ if (generating_for_darwin_version > 10)
+ flag_next_runtime = 100705;
+ else if (generating_for_darwin_version > 9)
+ flag_next_runtime = 100608;
+ else if (generating_for_darwin_version > 8)
+ flag_next_runtime = 100508;
+ else
+ flag_next_runtime = 100000;
+ }
+
/* Unless set, force ABI=2 for NeXT and m64, 0 otherwise. */
if (!global_options_set.x_flag_objc_abi)
global_options.x_flag_objc_abi
@@ -3542,7 +3611,8 @@ darwin_rename_builtins (void)
}
bool
-darwin_libc_has_function (enum function_class fn_class)
+darwin_libc_has_function (enum function_class fn_class,
+ tree type ATTRIBUTE_UNUSED)
{
if (fn_class == function_sincos)
return (strverscmp (darwin_macosx_version_min, "10.9") >= 0);
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 55a5361..f9d4fec 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -107,7 +107,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* Default to using the NeXT-style runtime, since that's what is
pre-installed on Darwin systems. */
-#define NEXT_OBJC_RUNTIME 1
+#define NEXT_OBJC_RUNTIME 100508
/* Don't default to pcc-struct-return, because gcc is the only compiler, and
we want to retain compatibility with older gcc versions. */
@@ -476,6 +476,7 @@ extern GTY(()) int darwin_ms_struct;
debugging data. */
#define ASM_DEBUG_SPEC "%{g*:%{%:debug-level-gt(0):%{!gdwarf*:--gstabs}}}"
+#define ASM_DEBUG_OPTION_SPEC ""
#define ASM_FINAL_SPEC \
"%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform} %<gsplit-dwarf"
diff --git a/gcc/config/darwin9.h b/gcc/config/darwin9.h
index b7bdf63..787aca7 100644
--- a/gcc/config/darwin9.h
+++ b/gcc/config/darwin9.h
@@ -41,6 +41,9 @@ along with GCC; see the file COPYING3. If not see
#undef ASM_DEBUG_SPEC
#define ASM_DEBUG_SPEC "%{g*:%{%:debug-level-gt(0):%{gstabs:--gstabs}}}"
+#undef ASM_DEBUG_OPTION_SPEC
+#define ASM_DEBUG_OPTION_SPEC ""
+
#undef ASM_OUTPUT_ALIGNED_COMMON
#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \
do { \
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 0e73fea..763e770 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -67,6 +67,7 @@
UNSPECV_ICACHE_INV])
(define_c_enum "unspec" [
+ UNSPEC_ADDPTR
UNSPEC_VECTOR
UNSPEC_BPERMUTE
UNSPEC_SGPRBASE
@@ -1219,29 +1220,47 @@
; "addptr" is the same as "add" except that it must not write to VCC or SCC
; as a side-effect. Unfortunately GCN does not have a suitable instruction
-; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
-; Note that it is not safe to save/clobber/restore SCC because doing so will
-; break data-flow analysis, so this must use vector registers.
+; for this, so we use CC_SAVE_REG as a temp.
+; Note that it is not safe to save/clobber/restore as separate insns because
+; doing so will break data-flow analysis, so this must use multiple
+; instructions in one insn.
;
; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
+;
+; The SGPR alternative is preferred as it is typically used with mov_sgprbase.
(define_insn "addptrdi3"
- [(set (match_operand:DI 0 "register_operand" "= v")
- (plus:DI (match_operand:DI 1 "register_operand" " v0")
- (match_operand:DI 2 "nonmemory_operand" "vDA")))]
+ [(set (match_operand:DI 0 "register_operand" "= v, Sg")
+ (unspec:DI [
+ (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0")
+ (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))]
+ UNSPEC_ADDPTR))]
""
{
- rtx new_operands[4] = { operands[0], operands[1], operands[2],
- gen_rtx_REG (DImode, CC_SAVE_REG) };
+ if (which_alternative == 0)
+ {
+ rtx new_operands[4] = { operands[0], operands[1], operands[2],
+ gen_rtx_REG (DImode, CC_SAVE_REG) };
- output_asm_insn ("v_add%^_u32 %L0, %3, %L2, %L1", new_operands);
- output_asm_insn ("v_addc%^_u32 %H0, %3, %H2, %H1, %3", new_operands);
+ output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands);
+ output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands);
+ }
+ else
+ {
+ rtx new_operands[4] = { operands[0], operands[1], operands[2],
+ gen_rtx_REG (BImode, CC_SAVE_REG) };
+
+ output_asm_insn ("s_mov_b32\t%3, scc", new_operands);
+ output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands);
+ output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands);
+ output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands);
+ }
return "";
}
- [(set_attr "type" "vmult")
- (set_attr "length" "16")])
+ [(set_attr "type" "vmult,mult")
+ (set_attr "length" "16,24")])
;; }}}
;; {{{ ALU special cases: Minus
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 0983b98..f7589a5 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -737,7 +737,8 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
obstack_ptr_grow (&argv_obstack, NULL);
const char **new_argv = XOBFINISH (&argv_obstack, const char **);
- fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+ fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
+ ".gccnative_args");
obstack_free (&argv_obstack, NULL);
}
@@ -1001,7 +1002,7 @@ main (int argc, char **argv)
unsetenv ("LIBRARY_PATH");
/* Run the compiler pass. */
- fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true);
+ fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true, ".gcc_args");
obstack_free (&cc_argv_obstack, NULL);
in = fopen (gcn_s1_name, "r");
@@ -1022,7 +1023,7 @@ main (int argc, char **argv)
fclose (out);
/* Run the assemble/link pass. */
- fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true);
+ fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
obstack_free (&ld_argv_obstack, NULL);
in = fopen (gcn_o_name, "r");
diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
index 6c15417..6dffe45 100644
--- a/gcc/config/i386/adxintrin.h
+++ b/gcc/config/i386/adxintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _ADXINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h
new file mode 100644
index 0000000..77cc395
--- /dev/null
+++ b/gcc/config/i386/amxbf16intrin.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXBF16INTRIN_H_INCLUDED
+#define _AMXBF16INTRIN_H_INCLUDED
+
+#if !defined(__AMX_BF16__)
+#pragma GCC push_options
+#pragma GCC target("amx-bf16")
+#define __DISABLE_AMX_BF16__
+#endif /* __AMX_BF16__ */
+
+#if defined(__x86_64__) && defined(__AMX_BF16__)
+#define _tile_dpbf16ps_internal(dst,src1,src2) \
+ __asm__ volatile\
+ ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbf16ps(dst,src1,src2) \
+ _tile_dpbf16ps_internal (dst, src1, src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_BF16__
+#undef __DISABLE_AMX_BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_BF16__ */
+
+#endif /* _AMXBF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h
new file mode 100644
index 0000000..f4e410b
--- /dev/null
+++ b/gcc/config/i386/amxint8intrin.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXINT8INTRIN_H_INCLUDED
+#define _AMXINT8INTRIN_H_INCLUDED
+
+#if !defined(__AMX_INT8__)
+#pragma GCC push_options
+#pragma GCC target("amx-int8")
+#define __DISABLE_AMX_INT8__
+#endif /* __AMX_INT8__ */
+
+#if defined(__x86_64__) && defined(__AMX_INT8__)
+#define _tile_int8_dp_internal(name,dst,src1,src2) \
+ __asm__ volatile \
+ ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbssd(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
+
+#define _tile_dpbsud(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
+
+#define _tile_dpbusd(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
+
+#define _tile_dpbuud(dst,src1,src2) \
+ _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_INT8__
+#undef __DISABLE_AMX_INT8__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_INT8__ */
+
+#endif /* _AMXINT8INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
new file mode 100644
index 0000000..41fb9a5
--- /dev/null
+++ b/gcc/config/i386/amxtileintrin.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXTILEINTRIN_H_INCLUDED
+#define _AMXTILEINTRIN_H_INCLUDED
+
+#if !defined(__AMX_TILE__)
+#pragma GCC push_options
+#pragma GCC target("amx-tile")
+#define __DISABLE_AMX_TILE__
+#endif /* __AMX_TILE__ */
+
+#if defined(__x86_64__) && defined(__AMX_TILE__)
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_loadconfig (const void *__config)
+{
+ __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_storeconfig (void *__config)
+{
+ __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_release (void)
+{
+ __asm__ volatile ("tilerelease" ::);
+}
+
+#define _tile_loadd(dst,base,stride) \
+ _tile_loadd_internal (dst, base, stride)
+
+#define _tile_loadd_internal(dst,base,stride) \
+ __asm__ volatile \
+ ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \
+ :: "r" ((const void*) base), "r" ((long) stride))
+
+#define _tile_stream_loadd(dst,base,stride) \
+ _tile_stream_loadd_internal (dst, base, stride)
+
+#define _tile_stream_loadd_internal(dst,base,stride) \
+ __asm__ volatile \
+ ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \
+ :: "r" ((const void*) base), "r" ((long) stride))
+
+#define _tile_stored(dst,base,stride) \
+ _tile_stored_internal (dst, base, stride)
+
+#define _tile_stored_internal(src,base,stride) \
+ __asm__ volatile \
+ ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \
+ :: "r" ((void*) base), "r" ((long) stride) \
+ : "memory")
+
+#define _tile_zero(dst) \
+ _tile_zero_internal (dst)
+
+#define _tile_zero_internal(dst) \
+ __asm__ volatile \
+ ("tilezero\t%%tmm"#dst ::)
+
+#endif
+
+#ifdef __DISABLE_AMX_TILE__
+#undef __DISABLE_AMX_TILE__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_TILE__ */
+
+#endif /* _AMXTILEINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 6bf1f8c..e29c532 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -950,6 +950,9 @@ _mm256_broadcastsi128_si256 (__m128i __X)
return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
}
+#define _mm_broadcastsi128_si256(X) _mm256_broadcastsi128_si256(X)
+#define _mm_broadcastsd_pd(X) _mm_movedup_pd(X)
+
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index d19c104..3da05e1 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -36,7 +36,11 @@
/* Internal data types for implementing the intrinsics. */
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \
+ __may_alias__, __aligned__ (1)));
typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \
+ __may_alias__, __aligned__ (1)));
typedef unsigned long long __mmask64;
@@ -303,6 +307,13 @@ _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi16 (void const *__P)
+{
+ return (__m512i) (*(__v32hi_u *) __P);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P,
@@ -322,6 +333,13 @@ _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi16 (void *__P, __m512i __A)
+{
+ *(__v32hi_u *) __P = (__v32hi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
{
__builtin_ia32_storedquhi512_mask ((short *) __P,
@@ -382,6 +400,13 @@ _kunpackd_mask64 (__mmask32 __A, __mmask32 __B)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi8 (void const *__P)
+{
+ return (__m512i) (*(__v64qi_u *) __P);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P,
@@ -401,6 +426,13 @@ _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi8 (void *__P, __m512i __A)
+{
+ *(__v64qi_u *) __P = (__v64qi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
{
__builtin_ia32_storedquqi512_mask ((char *) __P,
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index d28dfab..fd61b70 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -1168,6 +1168,17 @@ _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, int __C)
{
@@ -1179,6 +1190,17 @@ _mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A,
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df) __W,
+ __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
{
return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
@@ -1187,6 +1209,18 @@ _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
(__mmask8) __U);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ __U, __R);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_ss (__m128 __A, __m128 __B, int __C)
@@ -1197,6 +1231,16 @@ _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
(__mmask8) -1);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1211,6 +1255,17 @@ _mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A,
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf) __W,
+ __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
{
return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
@@ -1219,6 +1274,18 @@ _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
(__mmask8) __U);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __U, __R);
+}
+
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_range_sd (__m128d __A, __m128d __B, int __C)
@@ -1808,6 +1875,17 @@ _mm512_reduce_pd (__m512d __A, int __B)
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_round_pd (__m512d __A, int __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
+ __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
{
return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
@@ -1817,6 +1895,17 @@ _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ int __B, const int __R)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
+ __B,
+ (__v8df) __W,
+ __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
{
return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
@@ -1825,6 +1914,18 @@ _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
(__mmask8) __U);
}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_round_pd (__mmask8 __U, __m512d __A, int __B,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
+ __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ __U, __R);
+}
+
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_ps (__m512 __A, int __B)
@@ -1837,6 +1938,17 @@ _mm512_reduce_ps (__m512 __A, int __B)
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_round_ps (__m512 __A, int __B, const int __R)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
+ __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
{
return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
@@ -1846,6 +1958,17 @@ _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
+ __B,
+ (__v16sf) __W,
+ __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
{
return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
@@ -1854,6 +1977,18 @@ _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
(__mmask16) __U);
}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_round_ps (__mmask16 __U, __m512 __A, int __B,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
+ __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __U, __R);
+}
+
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_extractf32x8_ps (__m512 __A, const int __imm)
@@ -2440,26 +2575,50 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
(int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
+#define _mm512_reduce_round_pd(A, B, R) \
+ ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
+ (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
+
#define _mm512_mask_reduce_pd(W, U, A, B) \
((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
(int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
+#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
+ ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
+ (int)(B), (__v8df)(__m512d)(W), (U), (R)))
+
#define _mm512_maskz_reduce_pd(U, A, B) \
((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
(int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
+#define _mm512_maskz_reduce_round_pd(U, A, B, R) \
+ ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
+ (int)(B), (__v8df)_mm512_setzero_pd (), (U), (R)))
+
#define _mm512_reduce_ps(A, B) \
((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
(int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
+#define _mm512_reduce_round_ps(A, B, R) \
+ ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
+ (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
+
#define _mm512_mask_reduce_ps(W, U, A, B) \
((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
(int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
+#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
+ ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
+ (int)(B), (__v16sf)(__m512)(W), (U), (R)))
+
#define _mm512_maskz_reduce_ps(U, A, B) \
((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
(int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
+#define _mm512_maskz_reduce_round_ps(U, A, B, R) \
+ ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
+ (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
+
#define _mm512_extractf32x8_ps(X, C) \
((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
(int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
@@ -2679,6 +2838,20 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
(__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
(__mmask8)(U)))
+#define _mm_reduce_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R)))
+
+#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
+ (__mmask8)(U), (int)(R)))
+
#define _mm_reduce_ss(A, B, C) \
((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
@@ -2693,6 +2866,19 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
(__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
(__mmask8)(U)))
+#define _mm_reduce_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R)))
+
+#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
+ ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
+ ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
+ (__mmask8)(U), (int)(R)))
#endif
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index b9804c9..6ec8ee2 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -168,6 +168,30 @@ _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
__R);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, int __R)
+{
+ return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
+{
+ return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ __U,
+ __R);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
@@ -177,6 +201,30 @@ _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
__R);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, int __R)
+{
+ return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
+{
+ return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __U,
+ __R);
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
@@ -242,6 +290,30 @@ _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
__R);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, int __R)
+{
+ return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
+{
+ return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ __U,
+ __R);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
@@ -251,6 +323,30 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
__R);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, int __R)
+{
+ return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
+{
+ return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __U,
+ __R);
+}
+
#else
#define _mm512_exp2a23_round_pd(A, C) \
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
@@ -309,17 +405,69 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
#define _mm_rcp28_round_sd(A, B, R) \
__builtin_ia32_rcp28sd_round(A, B, R)
+#define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
+ __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rcp28_round_sd(U, A, B, R) \
+ __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
+ (U), (R))
+
#define _mm_rcp28_round_ss(A, B, R) \
__builtin_ia32_rcp28ss_round(A, B, R)
+#define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
+ __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rcp28_round_ss(U, A, B, R) \
+ __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
+ (U), (R))
+
#define _mm_rsqrt28_round_sd(A, B, R) \
__builtin_ia32_rsqrt28sd_round(A, B, R)
+#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
+ __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
+ __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
+ (U), (R))
+
#define _mm_rsqrt28_round_ss(A, B, R) \
__builtin_ia32_rsqrt28ss_round(A, B, R)
+#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
+ __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
+ __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
+ (U), (R))
+
#endif
+#define _mm_mask_rcp28_sd(W, U, A, B)\
+ _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_sd(U, A, B)\
+ _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_ss(W, U, A, B)\
+ _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_ss(U, A, B)\
+ _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(W, U, A, B)\
+ _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(U, A, B)\
+ _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(W, U, A, B)\
+ _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(U, A, B)\
+ _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
#define _mm512_exp2a23_pd(A) \
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 729d568..6342fde 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -2124,6 +2124,18 @@ _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
(__v4sf) _mm_setzero_ps (), U, C)
#endif
+#define _mm_mask_sqrt_sd(W, U, A, B) \
+ _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_sqrt_sd(U, A, B) \
+ _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_sqrt_ss(W, U, A, B) \
+ _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_sqrt_ss(U, A, B) \
+ _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi8_epi32 (__m128i __A)
@@ -3259,6 +3271,18 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
(__v4sf)_mm_setzero_ps (), -1, C)
#endif
+#define _mm_mask_scalef_sd(W, U, A, B) \
+ _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_scalef_sd(U, A, B) \
+ _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_scalef_ss(W, U, A, B) \
+ _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_scalef_ss(U, A, B) \
+ _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -8621,6 +8645,30 @@ _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
__R);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
+ __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
+ (__v2df) __B,
+ _mm_setzero_ps (),
+ __U,
+ __R);
+}
+
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
@@ -8629,6 +8677,30 @@ _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
(__v4sf) __B,
__R);
}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
+ __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
+ (__v4sf) __B,
+ _mm_setzero_pd (),
+ __U,
+ __R);
+}
#else
#define _mm512_cvt_roundpd_ps(A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
@@ -8642,10 +8714,37 @@ _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
#define _mm_cvt_roundsd_ss(A, B, C) \
(__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
+#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
+
+#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
+ (U), (C))
+
#define _mm_cvt_roundss_sd(A, B, C) \
(__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
+
+#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
+
+#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
+ (U), (C))
+
#endif
+#define _mm_mask_cvtss_sd(W, U, A, B) \
+ _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_cvtss_sd(U, A, B) \
+ _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_cvtsd_ss(W, U, A, B) \
+ _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_cvtsd_ss(U, A, B) \
+ _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_stream_si512 (__m512i * __P, __m512i __A)
@@ -14265,6 +14364,14 @@ _mm_cvttss_i64 (__m128 __A)
}
#endif /* __x86_64__ */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsi512_si32 (__m512i __A)
+{
+ __v16si __B = (__v16si) __A;
+ return __B[0];
+}
+
extern __inline unsigned
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_u32 (__m128 __A)
@@ -14289,6 +14396,34 @@ _mm_cvttss_i32 (__m128 __A)
_MM_FROUND_CUR_DIRECTION);
}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_i32 (__m128d __A)
+{
+ return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_i32 (__m128 __A)
+{
+ return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sd (__m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_ss (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
#ifdef __x86_64__
extern __inline unsigned long long
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -14315,6 +14450,34 @@ _mm_cvttsd_i64 (__m128d __A)
return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
_MM_FROUND_CUR_DIRECTION);
}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_i64 (__m128d __A)
+{
+ return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_i64 (__m128 __A)
+{
+ return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sd (__m128d __A, long long __B)
+{
+ return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
#endif /* __x86_64__ */
extern __inline unsigned
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index cd4275e..b4b1d7f 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -34,6 +34,15 @@
#define __DISABLE_AVX512VLBW__
#endif /* __AVX512VLBW__ */
+/* Internal data types for implementing the intrinsics. */
+typedef short __v16hi_u __attribute__ ((__vector_size__ (32), \
+ __may_alias__, __aligned__ (1)));
+typedef short __v8hi_u __attribute__ ((__vector_size__ (16), \
+ __may_alias__, __aligned__ (1)));
+typedef char __v32qi_u __attribute__ ((__vector_size__ (32), \
+ __may_alias__, __aligned__ (1)));
+typedef char __v16qi_u __attribute__ ((__vector_size__ (16), \
+ __may_alias__, __aligned__ (1)));
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -75,6 +84,13 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi8 (void *__P, __m256i __A)
+{
+ *(__v32qi_u *) __P = (__v32qi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
{
__builtin_ia32_storedquqi256_mask ((char *) __P,
@@ -84,6 +100,13 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi8 (void *__P, __m128i __A)
+{
+ *(__v16qi_u *) __P = (__v16qi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
{
__builtin_ia32_storedquqi128_mask ((char *) __P,
@@ -93,6 +116,13 @@ _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi16 (void const *__P)
+{
+ return (__m256i) (*(__v16hi_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
{
return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
@@ -112,6 +142,13 @@ _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi16 (void const *__P)
+{
+ return (__m128i) (*(__v8hi_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
@@ -170,6 +207,13 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi8 (void const *__P)
+{
+ return (__m256i) (*(__v32qi_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
{
return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
@@ -189,6 +233,13 @@ _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi8 (void const *__P)
+{
+ return (__m128i) (*(__v16qi_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
{
return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
@@ -3710,6 +3761,13 @@ _mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi16 (void *__P, __m256i __A)
+{
+ *(__v16hi_u *) __P = (__v16hi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
{
__builtin_ia32_storedquhi256_mask ((short *) __P,
@@ -3719,6 +3777,13 @@ _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi16 (void *__P, __m128i __A)
+{
+ *(__v8hi_u *) __P = (__v8hi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
{
__builtin_ia32_storedquhi128_mask ((short *) __P,
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 7abd601..99666c7 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -36,6 +36,14 @@
/* Internal data types for implementing the intrinsics. */
typedef unsigned int __mmask32;
+typedef int __v4si_u __attribute__ ((__vector_size__ (16), \
+ __may_alias__, __aligned__ (1)));
+typedef int __v8si_u __attribute__ ((__vector_size__ (32), \
+ __may_alias__, __aligned__ (1)));
+typedef long long __v2di_u __attribute__ ((__vector_size__ (16), \
+ __may_alias__, __aligned__ (1)));
+typedef long long __v4di_u __attribute__ ((__vector_size__ (32), \
+ __may_alias__, __aligned__ (1)));
extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -265,6 +273,13 @@ _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_epi64 (void const *__P)
+{
+ return (__m256i) (*(__v4di *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{
return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
@@ -286,6 +301,13 @@ _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_epi64 (void const *__P)
+{
+ return (__m128i) (*(__v2di *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
@@ -363,6 +385,13 @@ _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_epi32 (void const *__P)
+{
+ return (__m256i) (*(__v8si *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{
return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
@@ -384,6 +413,13 @@ _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_epi32 (void const *__P)
+{
+ return (__m128i) (*(__v4si *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
@@ -405,6 +441,13 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_epi32 (void *__P, __m256i __A)
+{
+ *(__v8si *) __P = (__v8si) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
{
__builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
@@ -414,6 +457,13 @@ _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_epi32 (void *__P, __m128i __A)
+{
+ *(__v4si *) __P = (__v4si) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
{
__builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
@@ -719,6 +769,13 @@ _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi64 (void const *__P)
+{
+ return (__m256i) (*(__v4di_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{
return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
@@ -738,6 +795,13 @@ _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi64 (void const *__P)
+{
+ return (__m128i) (*(__v2di_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
@@ -789,6 +853,13 @@ _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi32 (void const *__P)
+{
+ return (__m256i) (*(__v8si_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{
return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
@@ -808,6 +879,13 @@ _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi32 (void const *__P)
+{
+ return (__m128i) (*(__v4si_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
@@ -13730,6 +13808,13 @@ _mm256_permutex_pd (__m256d __X, const int __M)
#endif
#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
+#define _mm256_mask_cvt_roundps_ph(A, B, C, D) \
+ _mm256_mask_cvtps_ph ((A), (B), (C), (D))
+#define _mm256_maskz_cvt_roundps_ph(A, B, C) \
+ _mm256_maskz_cvtps_ph ((A), (B), (C))
+#define _mm_mask_cvt_roundps_ph(A, B, C, D) \
+ _mm_mask_cvtps_ph ((A), (B), (C), (D))
+#define _mm_maskz_cvt_roundps_ph(A, B, C) _mm_maskz_cvtps_ph ((A), (B), (C))
#ifdef __DISABLE_AVX512VL__
#undef __DISABLE_AVX512VL__
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index 60cb52c..f368d83 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
#endif
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
index 26eee36..f657840 100644
--- a/gcc/config/i386/avx512vp2intersectvlintrin.h
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
#endif
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index 22b2bae..fd5cf6a 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -444,6 +444,13 @@ _mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
(__v4sf)(__m128)(Y), (int)(P)))
#endif
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsi256_si32 (__m256i __A)
+{
+ __v8si __B = (__v8si) __A;
+ return __B[0];
+}
+
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_pd (__m128i __A)
{
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
index c5de9eb..9fdd08c 100644
--- a/gcc/config/i386/bmi2intrin.h
+++ b/gcc/config/i386/bmi2intrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _BMI2INTRIN_H_INCLUDED
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 8ba6e5b..5bd712a 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _BMIINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/cetintrin.h b/gcc/config/i386/cetintrin.h
index 095bbe0..81c4d72 100644
--- a/gcc/config/i386/cetintrin.h
+++ b/gcc/config/i386/cetintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <cetintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _CETINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/cldemoteintrin.h b/gcc/config/i386/cldemoteintrin.h
index 8c0feca..0c31c35 100644
--- a/gcc/config/i386/cldemoteintrin.h
+++ b/gcc/config/i386/cldemoteintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <cldemoteintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _CLDEMOTE_H_INCLUDED
diff --git a/gcc/config/i386/clflushoptintrin.h b/gcc/config/i386/clflushoptintrin.h
index 037f044..a3697f0 100644
--- a/gcc/config/i386/clflushoptintrin.h
+++ b/gcc/config/i386/clflushoptintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/clwbintrin.h b/gcc/config/i386/clwbintrin.h
index 84d0939..3f83962 100644
--- a/gcc/config/i386/clwbintrin.h
+++ b/gcc/config/i386/clwbintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _CLWBINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index af37f5c..0b902d5 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -183,6 +183,10 @@
"@internal Memory operand without REX prefix."
(match_operand 0 "norex_memory_operand"))
+(define_special_memory_constraint "Br"
+ "@internal bcst memory operand."
+ (match_operand 0 "bcst_mem_operand"))
+
(define_constraint "Bs"
"@internal Sibcall memory operand."
(ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index bca61d62..22d284e 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -26,6 +26,7 @@
/* %eax */
#define bit_AVX512BF16 (1 << 5)
+#define bit_HRESET (1 << 22)
/* %ecx */
#define bit_SSE3 (1 << 0)
@@ -124,9 +125,13 @@
#define bit_AVX5124FMAPS (1 << 3)
#define bit_AVX512VP2INTERSECT (1 << 8)
#define bit_IBT (1 << 20)
+#define bit_UINTR (1 << 5)
#define bit_PCONFIG (1 << 18)
#define bit_SERIALIZE (1 << 14)
#define bit_TSXLDTRK (1 << 16)
+#define bit_AMX_BF16 (1 << 22)
+#define bit_AMX_TILE (1 << 24)
+#define bit_AMX_INT8 (1 << 25)
/* XFEATURE_ENABLED_MASK register bits (%eax == 0xd, %ecx == 0) */
#define bit_BNDREGS (1 << 3)
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 545d3bf..8ff240e 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -715,6 +715,19 @@ _mm_loadu_si64 (void const *__P)
return _mm_loadl_epi64 ((__m128i_u *)__P);
}
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si32 (void const *__P)
+{
+ return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si16 (void const *__P)
+{
+ return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0,
+ (short)0, (short)0, (short)0, (short)0);
+}
+
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_si128 (__m128i *__P, __m128i __B)
{
@@ -739,6 +752,18 @@ _mm_storeu_si64 (void *__P, __m128i __B)
_mm_storel_epi64 ((__m128i_u *)__P, __B);
}
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si32 (void *__P, __m128i __B)
+{
+ *(__m32_u *)__P = (__m32) ((__v4si)__B)[0];
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si16 (void *__P, __m128i __B)
+{
+ *(__m16_u *)__P = (__m16) ((__v8hi)__B)[0];
+}
+
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movepi64_pi64 (__m128i __B)
{
diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h
index 4b2efcb..dcb6507 100644
--- a/gcc/config/i386/enqcmdintrin.h
+++ b/gcc/config/i386/enqcmdintrin.h
@@ -21,12 +21,12 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <enqcmdntrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
#endif
-#ifndef _ENQCMDNTRIN_H_INCLUDED
-#define _ENQCMDNTRIN_H_INCLUDED
+#ifndef _ENQCMDINTRIN_H_INCLUDED
+#define _ENQCMDINTRIN_H_INCLUDED
#ifndef __ENQCMD__
#pragma GCC push_options
@@ -52,4 +52,4 @@ _enqcmds (void * __P, const void * __Q)
#undef __DISABLE_ENQCMD__
#pragma GCC pop_options
#endif /* __DISABLE_ENQCMD__ */
-#endif /* _ENQCMDNTRIN_H_INCLUDED. */
+#endif /* _ENQCMDINTRIN_H_INCLUDED. */
diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
index fde05a7..6e059df 100644
--- a/gcc/config/i386/fxsrintrin.h
+++ b/gcc/config/i386/fxsrintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _FXSRINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/hresetintrin.h b/gcc/config/i386/hresetintrin.h
new file mode 100644
index 0000000..bdbe253
--- /dev/null
+++ b/gcc/config/i386/hresetintrin.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _X86GPRINTRIN_H_INCLUDED
+# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
+#endif
+
+#ifndef _HRESETINTRIN_H_INCLUDED
+#define _HRESETINTRIN_H_INCLUDED
+
+#ifndef __HRESET__
+#pragma GCC push_options
+#pragma GCC target ("hreset")
+#define __DISABLE_HRESET__
+#endif /* __HRESET__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_hreset (unsigned int __EAX)
+{
+ __builtin_ia32_hreset (__EAX);
+}
+
+#ifdef __DISABLE_HRESET__
+#undef __DISABLE_HRESET__
+#pragma GCC pop_options
+#endif /* __DISABLE_HRESET__ */
+#endif /* _HRESETINTRIN_H_INCLUDED. */
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 1adf7c4..964633d 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -194,6 +194,7 @@ DEF_FUNCTION_TYPE (UNSIGNED)
DEF_FUNCTION_TYPE (UINT)
DEF_FUNCTION_TYPE (USHORT)
DEF_FUNCTION_TYPE (INT)
+DEF_FUNCTION_TYPE (UINT8)
DEF_FUNCTION_TYPE (VOID)
DEF_FUNCTION_TYPE (PVOID)
@@ -443,6 +444,7 @@ DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, UQI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, UQI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT, UQI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, UQI)
@@ -452,6 +454,7 @@ DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, UHI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, UHI, INT)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, UHI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT)
@@ -1026,8 +1029,10 @@ DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, UQI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, UQI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, UQI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, UQI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, INT)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index fec5cef..882cba5 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -452,6 +452,14 @@ BDESC (0, OPTION_MASK_ISA2_SERIALIZE, CODE_FOR_serialize, "__builtin_ia32_serial
BDESC (0, OPTION_MASK_ISA2_TSXLDTRK, CODE_FOR_xsusldtrk, "__builtin_ia32_xsusldtrk", IX86_BUILTIN_XSUSLDTRK, UNKNOWN, (int) VOID_FTYPE_VOID)
BDESC (0, OPTION_MASK_ISA2_TSXLDTRK, CODE_FOR_xresldtrk, "__builtin_ia32_xresldtrk", IX86_BUILTIN_XRESLDTRK, UNKNOWN, (int) VOID_FTYPE_VOID)
+/* UINTR. */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_clui, "__builtin_ia32_clui", IX86_BUILTIN_CLUI, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_stui, "__builtin_ia32_stui", IX86_BUILTIN_STUI, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_senduipi, "__builtin_ia32_senduipi", IX86_BUILTIN_SENDUIPI, UNKNOWN, (int) VOID_FTYPE_UINT64)
+
+/* HRESET */
+BDESC (0, OPTION_MASK_ISA2_HRESET, CODE_FOR_hreset, "__builtin_ia32_hreset", IX86_BUILTIN_HRESET, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
+
BDESC_END (SPECIAL_ARGS, ARGS)
/* Builtins with variable number of arguments. */
@@ -2772,10 +2780,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_r
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT)
BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
@@ -2911,13 +2921,21 @@ BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_exp2v16sf_mask_round, "__b
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v2df_mask_round, "__builtin_ia32_rcp28sd_mask_round", IX86_BUILTIN_RCP28SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v4sf_mask_round, "__builtin_ia32_rcp28ss_mask_round", IX86_BUILTIN_RCP28SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v2df_mask_round, "__builtin_ia32_rsqrt28sd_mask_round", IX86_BUILTIN_RSQRT28SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v4sf_mask_round, "__builtin_ia32_rsqrt28ss_mask_round", IX86_BUILTIN_RSQRT28SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
/* AVX512DQ. */
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index ca7a870..504987a 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -1194,6 +1194,11 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_tpause",
UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE);
+ /* UINTR. */
+ def_builtin (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR,
+ "__builtin_ia32_testui",
+ UINT8_FTYPE_VOID, IX86_BUILTIN_TESTUI);
+
/* CLDEMOTE. */
def_builtin (0, OPTION_MASK_ISA2_CLDEMOTE, "__builtin_ia32_cldemote",
VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE);
diff --git a/gcc/config/i386/i386-builtins.h b/gcc/config/i386/i386-builtins.h
index cc6a8ce..a88cc0c 100644
--- a/gcc/config/i386/i386-builtins.h
+++ b/gcc/config/i386/i386-builtins.h
@@ -40,6 +40,7 @@ enum ix86_builtins
IX86_BUILTIN_UMONITOR,
IX86_BUILTIN_UMWAIT,
IX86_BUILTIN_TPAUSE,
+ IX86_BUILTIN_TESTUI,
IX86_BUILTIN_CLZERO,
IX86_BUILTIN_CLDEMOTE,
IX86_BUILTIN_VEC_INIT_V2SI,
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 3553a37..bbe9ac5 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -588,6 +588,20 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__ENQCMD__");
if (isa_flag2 & OPTION_MASK_ISA2_TSXLDTRK)
def_or_undef (parse_in, "__TSXLDTRK__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_TILE)
+ def_or_undef (parse_in, "__AMX_TILE__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_INT8)
+ def_or_undef (parse_in, "__AMX_INT8__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_BF16)
+ def_or_undef (parse_in, "__AMX_BF16__");
+ if (isa_flag & OPTION_MASK_ISA_SAHF)
+ def_or_undef (parse_in, "__LAHF_SAHF__");
+ if (isa_flag2 & OPTION_MASK_ISA2_MOVBE)
+ def_or_undef (parse_in, "__MOVBE__");
+ if (isa_flag2 & OPTION_MASK_ISA2_UINTR)
+ def_or_undef (parse_in, "__UINTR__");
+ if (isa_flag2 & OPTION_MASK_ISA2_HRESET)
+ def_or_undef (parse_in, "__HRESET__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index e6f8b31..3e8afe6 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -1045,7 +1045,8 @@ ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
rtx src2 = operands[2];
/* Both source operands cannot be in memory. */
- if (MEM_P (src1) && MEM_P (src2))
+ if ((MEM_P (src1) || bcst_mem_operand (src1, mode))
+ && (MEM_P (src2) || bcst_mem_operand (src2, mode)))
return false;
/* Canonicalize operand order for commutative operators. */
@@ -3525,6 +3526,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
machine_mode mode = GET_MODE (dest);
machine_mode cmpmode = GET_MODE (cmp);
+ /* Simplify trivial VEC_COND_EXPR to avoid ICE in pr97506. */
+ if (rtx_equal_p (op_true, op_false))
+ {
+ emit_move_insn (dest, op_true);
+ return;
+ }
+
/* In AVX512F the result of comparison is an integer mask. */
bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode);
@@ -10225,12 +10233,16 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
+ case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
+ case V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT:
nargs = 5;
break;
case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
+ case V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT:
+ case V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT:
nargs_constant = 4;
nargs = 5;
break;
@@ -10413,6 +10425,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case USHORT_FTYPE_VOID:
case UINT64_FTYPE_VOID:
case UINT_FTYPE_VOID:
+ case UINT8_FTYPE_VOID:
case UNSIGNED_FTYPE_VOID:
nargs = 0;
klass = load;
@@ -11203,6 +11216,19 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
return target;
+ case IX86_BUILTIN_TESTUI:
+ emit_insn (gen_testui ());
+
+ if (target == 0
+ || !register_operand (target, QImode))
+ target = gen_reg_rtx (QImode);
+
+ pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (target, pat));
+
+ return target;
+
case IX86_BUILTIN_CLZERO:
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
@@ -12807,6 +12833,14 @@ rdseed_step:
emit_insn (gen_incssp (mode, op0));
return 0;
+ case IX86_BUILTIN_HRESET:
+ icode = CODE_FOR_hreset;
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+ op0 = force_reg (SImode, op0);
+ emit_insn (gen_hreset (op0));
+ return 0;
+
case IX86_BUILTIN_RSTORSSP:
case IX86_BUILTIN_CLRSSBSY:
arg0 = CALL_EXPR_ARG (exp, 0);
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index 2fabd20..82c8091 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -209,7 +209,12 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mavx512bf16", OPTION_MASK_ISA2_AVX512BF16 },
{ "-menqcmd", OPTION_MASK_ISA2_ENQCMD },
{ "-mserialize", OPTION_MASK_ISA2_SERIALIZE },
- { "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK }
+ { "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK },
+ { "-mamx-tile", OPTION_MASK_ISA2_AMX_TILE },
+ { "-mamx-int8", OPTION_MASK_ISA2_AMX_INT8 },
+ { "-mamx-bf16", OPTION_MASK_ISA2_AMX_BF16 },
+ { "-muintr", OPTION_MASK_ISA2_UINTR },
+ { "-mhreset", OPTION_MASK_ISA2_HRESET }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1028,11 +1033,16 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
+ IX86_ATTR_ISA ("uintr", OPT_muintr),
IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
IX86_ATTR_ISA ("avx512bf16", OPT_mavx512bf16),
IX86_ATTR_ISA ("enqcmd", OPT_menqcmd),
IX86_ATTR_ISA ("serialize", OPT_mserialize),
IX86_ATTR_ISA ("tsxldtrk", OPT_mtsxldtrk),
+ IX86_ATTR_ISA ("amx-tile", OPT_mamx_tile),
+ IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8),
+ IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16),
+ IX86_ATTR_ISA ("hreset", OPT_mhreset),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -1893,6 +1903,9 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_stringop_alg = no_stringop;
}
+ if (TARGET_UINTR && !TARGET_64BIT)
+ error ("%<-muintr%> not supported for 32-bit code");
+
if (!opts->x_ix86_arch_string)
opts->x_ix86_arch_string
= TARGET_64BIT_P (opts->x_ix86_isa_flags)
@@ -2052,10 +2065,27 @@ ix86_option_override_internal (bool main_args_p,
return false;
}
+ /* The feature-only micro-architecture levels that use
+ PTA_NO_TUNE are only defined for the x86-64 psABI. */
+ if ((processor_alias_table[i].flags & PTA_NO_TUNE) != 0
+ && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
+ || opts->x_ix86_abi != SYSV_ABI))
+ {
+ error (G_("%<%s%> architecture level is only defined"
+ " for the x86-64 psABI"), opts->x_ix86_arch_string);
+ return false;
+ }
+
ix86_schedule = processor_alias_table[i].schedule;
ix86_arch = processor_alias_table[i].processor;
- /* Default cpu tuning to the architecture. */
- ix86_tune = ix86_arch;
+
+ /* Default cpu tuning to the architecture, unless the table
+ entry requests not to do this. Used by the x86-64 psABI
+ micro-architecture levels. */
+ if ((processor_alias_table[i].flags & PTA_NO_TUNE) == 0)
+ ix86_tune = ix86_arch;
+ else
+ ix86_tune = PROCESSOR_GENERIC;
if (((processor_alias_table[i].flags & PTA_MMX) != 0)
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
@@ -2258,6 +2288,18 @@ ix86_option_override_internal (bool main_args_p,
&& !(opts->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA2_AVX512BF16))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512BF16;
+ if (((processor_alias_table[i].flags & PTA_AMX_TILE) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AMX_TILE))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE;
+ if (((processor_alias_table[i].flags & PTA_AMX_INT8) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AMX_INT8))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8;
+ if (((processor_alias_table[i].flags & PTA_AMX_BF16) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AMX_BF16))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16;
if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0)
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI;
@@ -2366,7 +2408,8 @@ ix86_option_override_internal (bool main_args_p,
ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
for (i = 0; i < pta_size; i++)
- if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
+ if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)
+ && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0)
{
ix86_schedule = processor_alias_table[i].schedule;
ix86_tune = processor_alias_table[i].processor;
@@ -2410,8 +2453,9 @@ ix86_option_override_internal (bool main_args_p,
auto_vec <const char *> candidates;
for (i = 0; i < pta_size; i++)
- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
- || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
+ if ((!TARGET_64BIT_P (opts->x_ix86_isa_flags)
+ || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
+ && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0)
candidates.safe_push (processor_alias_table[i].name);
#ifdef HAVE_LOCAL_CPU_DETECT
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c890a73..502d240 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1484,7 +1484,7 @@ ix86_reg_parm_stack_space (const_tree fndecl)
bool
ix86_libc_has_function (enum function_class fn_class)
{
- return targetm.libc_has_function (fn_class);
+ return targetm.libc_has_function (fn_class, NULL_TREE);
}
/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
@@ -13098,6 +13098,43 @@ ix86_print_operand (FILE *file, rtx x, int code)
fputs (dstr, file);
}
+ /* Print bcst_mem_operand. */
+ else if (GET_CODE (x) == VEC_DUPLICATE)
+ {
+ machine_mode vmode = GET_MODE (x);
+ /* Must be bcst_memory_operand. */
+ gcc_assert (bcst_mem_operand (x, vmode));
+
+ rtx mem = XEXP (x,0);
+ ix86_print_operand (file, mem, 0);
+
+ switch (vmode)
+ {
+ case E_V2DImode:
+ case E_V2DFmode:
+ fputs ("{1to2}", file);
+ break;
+ case E_V4SImode:
+ case E_V4SFmode:
+ case E_V4DImode:
+ case E_V4DFmode:
+ fputs ("{1to4}", file);
+ break;
+ case E_V8SImode:
+ case E_V8SFmode:
+ case E_V8DFmode:
+ case E_V8DImode:
+ fputs ("{1to8}", file);
+ break;
+ case E_V16SFmode:
+ case E_V16SImode:
+ fputs ("{1to16}", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
else
{
/* We have patterns that allow zero sets of memory, for instance.
@@ -15131,11 +15168,32 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
/* Codes needing carry flag. */
case GEU: /* CF=0 */
case LTU: /* CF=1 */
+ rtx geu;
/* Detect overflow checks. They need just the carry flag. */
if (GET_CODE (op0) == PLUS
&& (rtx_equal_p (op1, XEXP (op0, 0))
|| rtx_equal_p (op1, XEXP (op0, 1))))
return CCCmode;
+ /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
+ Match LTU of op0
+ (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
+ and op1
+ (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
+ where CC_CCC is either CC or CCC. */
+ else if (code == LTU
+ && GET_CODE (op0) == NEG
+ && GET_CODE (geu = XEXP (op0, 0)) == GEU
+ && REG_P (XEXP (geu, 0))
+ && (GET_MODE (XEXP (geu, 0)) == CCCmode
+ || GET_MODE (XEXP (geu, 0)) == CCmode)
+ && REGNO (XEXP (geu, 0)) == FLAGS_REG
+ && XEXP (geu, 1) == const0_rtx
+ && GET_CODE (op1) == LTU
+ && REG_P (XEXP (op1, 0))
+ && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
+ && REGNO (XEXP (op1, 0)) == FLAGS_REG
+ && XEXP (op1, 1) == const0_rtx)
+ return CCCmode;
else
return CCmode;
case GTU: /* CF=0 & ZF=0 */
@@ -19749,33 +19807,56 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
return false;
case COMPARE:
- if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
- && XEXP (XEXP (x, 0), 1) == const1_rtx
- && CONST_INT_P (XEXP (XEXP (x, 0), 2))
- && XEXP (x, 1) == const0_rtx)
+ rtx op0, op1;
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ if (GET_CODE (op0) == ZERO_EXTRACT
+ && XEXP (op0, 1) == const1_rtx
+ && CONST_INT_P (XEXP (op0, 2))
+ && op1 == const0_rtx)
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
- mode = GET_MODE (XEXP (XEXP (x, 0), 0));
+ mode = GET_MODE (XEXP (op0, 0));
*total = (cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
+ + rtx_cost (XEXP (op0, 0), mode, outer_code,
opno, speed)
+ rtx_cost (const1_rtx, mode, outer_code, opno, speed));
return true;
}
- if (GET_CODE (XEXP (x, 0)) == PLUS
- && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1)))
+ if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
{
/* This is an overflow detection, count it as a normal compare. */
- *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
- COMPARE, 0, speed);
+ *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
+ return true;
+ }
+
+ rtx geu;
+ /* Match x
+ (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
+ (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
+ if (mode == CCCmode
+ && GET_CODE (op0) == NEG
+ && GET_CODE (geu = XEXP (op0, 0)) == GEU
+ && REG_P (XEXP (geu, 0))
+ && (GET_MODE (XEXP (geu, 0)) == CCCmode
+ || GET_MODE (XEXP (geu, 0)) == CCmode)
+ && REGNO (XEXP (geu, 0)) == FLAGS_REG
+ && XEXP (geu, 1) == const0_rtx
+ && GET_CODE (op1) == LTU
+ && REG_P (XEXP (op1, 0))
+ && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
+ && REGNO (XEXP (op1, 0)) == FLAGS_REG
+ && XEXP (op1, 1) == const0_rtx)
+ {
+ /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
+ *total = 0;
return true;
}
/* The embedded comparison operand is completely free. */
- if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
- && XEXP (x, 1) == const0_rtx)
+ if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
*total = 0;
return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 92b7475..24207d0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -203,6 +203,16 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_SERIALIZE_P(x) TARGET_ISA2_SERIALIZE_P(x)
#define TARGET_TSXLDTRK TARGET_ISA2_TSXLDTRK
#define TARGET_TSXLDTRK_P(x) TARGET_ISA2_TSXLDTRK_P(x)
+#define TARGET_AMX_TILE TARGET_ISA2_AMX_TILE
+#define TARGET_AMX_TILE_P(x) TARGET_ISA2_AMX_TILE(x)
+#define TARGET_AMX_INT8 TARGET_ISA2_AMX_INT8
+#define TARGET_AMX_INT8_P(x) TARGET_ISA2_AMX_INT8(x)
+#define TARGET_AMX_BF16 TARGET_ISA2_AMX_BF16
+#define TARGET_AMX_BF16_P(x) TARGET_ISA2_AMX_BF16(x)
+#define TARGET_UINTR TARGET_ISA2_UINTR
+#define TARGET_UINTR_P(x) TARGET_ISA2_UINTR_P(x)
+#define TARGET_HRESET TARGET_ISA2_HRESET
+#define TARGET_HRESET_P(x) TARGET_ISA2_HRESET_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@@ -1262,6 +1272,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
(TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
|| (MODE) == V8SFmode || (MODE) == V4DFmode))
+#define VALID_BCST_MODE_P(MODE) \
+ ((MODE) == SFmode || (MODE) == DFmode \
+ || (MODE) == SImode || (MODE) == DImode)
+
/* It is possible to write patterns to move flags; but until someone
does it, */
#define AVOID_CCMODE_COPIES
@@ -2427,7 +2441,7 @@ const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
-/* Hole after PTA_MPX was removed. */
+const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44);
const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
@@ -2466,6 +2480,21 @@ const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15);
const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16);
const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17);
const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18);
+const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19);
+const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20);
+const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21);
+const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22);
+const wide_int_bitmask PTA_HRESET(0, HOST_WIDE_INT_1U << 23);
+
+const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
+const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF))
+ | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3;
+const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2
+ | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
+ | PTA_MOVBE | PTA_XSAVE;
+const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3
+ | PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL;
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -2499,9 +2528,10 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT;
const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
- | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK;
+ | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
+ | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR;
const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
- | PTA_WAITPKG | PTA_SERIALIZE;
+ | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET;
const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
| PTA_AVX512F | PTA_AVX512CD;
const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 93aae81..8730816 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -186,6 +186,10 @@
;; IRET support
UNSPEC_INTERRUPT_RETURN
+
+ ;; For MOVDIRI and MOVDIR64B support
+ UNSPEC_MOVDIRI
+ UNSPEC_MOVDIR64B
])
(define_c_enum "unspecv" [
@@ -280,10 +284,6 @@
UNSPECV_SETSSBSY
UNSPECV_CLRSSBSY
- ;; For MOVDIRI and MOVDIR64B support
- UNSPECV_MOVDIRI
- UNSPECV_MOVDIR64B
-
;; For TSXLDTRK support
UNSPECV_XSUSLDTRK
UNSPECV_XRESLDTRK
@@ -293,6 +293,12 @@
UNSPECV_UMONITOR
UNSPECV_TPAUSE
+ ;; For UINTR support
+ UNSPECV_CLUI
+ UNSPECV_STUI
+ UNSPECV_TESTUI
+ UNSPECV_SENDUIPI
+
;; For CLDEMOTE support
UNSPECV_CLDEMOTE
@@ -310,6 +316,9 @@
;; For patchable area support
UNSPECV_PATCHABLE_AREA
+
+ ;; For HRESET support
+ UNSPECV_HRESET
])
;; Constants to represent rounding modes in the ROUND instruction
@@ -7039,6 +7048,20 @@
(set (match_operand:SWI48 0 "register_operand")
(minus:SWI48 (match_dup 1) (match_dup 2)))])]
"ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
+
+(define_mode_iterator CC_CCC [CC CCC])
+
+;; Pre-reload splitter to optimize
+;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
+;; operand and no intervening flags modifications into nothing.
+(define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
+ (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)])
;; Overflow setting add instructions
@@ -13666,7 +13689,7 @@
(unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
"reload_completed"
{
- return TARGET_64BIT ? "iretq" : "iret";
+ return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
})
;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
@@ -21531,17 +21554,17 @@
;; MOVDIRI and MOVDIR64B
(define_insn "movdiri<mode>"
- [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m")
- (match_operand:SWI48 1 "register_operand" "r")]
- UNSPECV_MOVDIRI)]
+ [(set (match_operand:SWI48 0 "memory_operand" "=m")
+ (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
+ UNSPEC_MOVDIRI))]
"TARGET_MOVDIRI"
"movdiri\t{%1, %0|%0, %1}"
[(set_attr "type" "other")])
(define_insn "@movdir64b_<mode>"
- [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
- (match_operand:XI 1 "memory_operand")]
- UNSPECV_MOVDIR64B)]
+ [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
+ (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
+ UNSPEC_MOVDIR64B))]
"TARGET_MOVDIR64B"
"movdir64b\t{%1, %0|%0, %1}"
[(set_attr "type" "other")])
@@ -21571,6 +21594,34 @@
"enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
[(set_attr "type" "other")])
+;; UINTR
+(define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
+(define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
+
+(define_insn "<uintr>"
+ [(unspec_volatile [(const_int 0)] UINTR)]
+ "TARGET_UINTR && TARGET_64BIT"
+ "<uintr>"
+ [(set_attr "type" "other")
+ (set_attr "length" "4")])
+
+(define_insn "testui"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
+ "TARGET_UINTR && TARGET_64BIT"
+ "testui"
+ [(set_attr "type" "other")
+ (set_attr "length" "4")])
+
+(define_insn "senduipi"
+ [(unspec_volatile
+ [(match_operand:DI 0 "register_operand" "r")]
+ UNSPECV_SENDUIPI)]
+ "TARGET_UINTR && TARGET_64BIT"
+ "senduipi\t%0"
+ [(set_attr "type" "other")
+ (set_attr "length" "4")])
+
;; WAITPKG
(define_insn "umwait"
@@ -21655,6 +21706,14 @@
(set_attr "length_immediate" "0")
(set_attr "modrm" "0")])
+(define_insn "hreset"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
+ UNSPECV_HRESET)]
+ "TARGET_HRESET"
+ "hreset\t{$0|0}"
+ [(set_attr "type" "other")
+ (set_attr "length" "4")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index c9f7195..e6b1695 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -788,6 +788,10 @@ mptwrite
Target Report Mask(ISA2_PTWRITE) Var(ix86_isa_flags2) Save
Support PTWRITE built-in functions and code generation.
+muintr
+Target Report Mask(ISA2_UINTR) Var(ix86_isa_flags2) Save
+Support UINTR built-in functions and code generation.
+
msgx
Target Report Mask(ISA2_SGX) Var(ix86_isa_flags2) Save
Support SGX built-in functions and code generation.
@@ -1114,4 +1118,20 @@ Support SERIALIZE built-in functions and code generation.
mtsxldtrk
Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
-Support TSXLDTRK built-in functions and code generation. \ No newline at end of file
+Support TSXLDTRK built-in functions and code generation.
+
+mamx-tile
+Target Report Mask(ISA2_AMX_TILE) Var(ix86_isa_flags2) Save
+Support AMX-TILE built-in functions and code generation.
+
+mamx-int8
+Target Report Mask(ISA2_AMX_INT8) Var(ix86_isa_flags2) Save
+Support AMX-INT8 built-in functions and code generation.
+
+mamx-bf16
+Target Report Mask(ISA2_AMX_BF16) Var(ix86_isa_flags2) Save
+Support AMX-BF16 built-in functions and code generation.
+
+mhreset
+Target Report Mask(ISA2_HRESET) Var(ix86_isa_flags2) Save
+Support HRESET built-in functions and code generation.
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index fd29797..3568d1f 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
#endif
/* 32bit bsf */
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b660d0d..71eae83 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -24,6 +24,8 @@
#ifndef _IMMINTRIN_H_INCLUDED
#define _IMMINTRIN_H_INCLUDED
+#include <x86gprintrin.h>
+
#include <mmintrin.h>
#include <xmmintrin.h>
@@ -38,16 +40,6 @@
#include <wmmintrin.h>
-#include <fxsrintrin.h>
-
-#include <xsaveintrin.h>
-
-#include <xsaveoptintrin.h>
-
-#include <xsavesintrin.h>
-
-#include <xsavecintrin.h>
-
#include <avxintrin.h>
#include <avx2intrin.h>
@@ -102,217 +94,28 @@
#include <shaintrin.h>
-#include <lzcntintrin.h>
-
-#include <bmiintrin.h>
-
-#include <bmi2intrin.h>
-
#include <fmaintrin.h>
#include <f16cintrin.h>
#include <rtmintrin.h>
-#include <xtestintrin.h>
-
-#include <cetintrin.h>
-
#include <gfniintrin.h>
#include <vaesintrin.h>
#include <vpclmulqdqintrin.h>
-#include <movdirintrin.h>
-
-#include <sgxintrin.h>
-
-#include <pconfigintrin.h>
-
-#include <waitpkgintrin.h>
-
-#include <cldemoteintrin.h>
-
#include <avx512bf16vlintrin.h>
#include <avx512bf16intrin.h>
-#include <enqcmdintrin.h>
+#include <amxtileintrin.h>
-#include <serializeintrin.h>
+#include <amxint8intrin.h>
-#include <tsxldtrkintrin.h>
-
-#include <rdseedintrin.h>
+#include <amxbf16intrin.h>
#include <prfchwintrin.h>
-#include <adxintrin.h>
-
-#include <clwbintrin.h>
-
-#include <clflushoptintrin.h>
-
-#include <wbnoinvdintrin.h>
-
-#include <pkuintrin.h>
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_wbinvd (void)
-{
- __builtin_ia32_wbinvd ();
-}
-
-#ifndef __RDRND__
-#pragma GCC push_options
-#pragma GCC target("rdrnd")
-#define __DISABLE_RDRND__
-#endif /* __RDRND__ */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand16_step (unsigned short *__P)
-{
- return __builtin_ia32_rdrand16_step (__P);
-}
-
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand32_step (unsigned int *__P)
-{
- return __builtin_ia32_rdrand32_step (__P);
-}
-#ifdef __DISABLE_RDRND__
-#undef __DISABLE_RDRND__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDRND__ */
-
-#ifndef __RDPID__
-#pragma GCC push_options
-#pragma GCC target("rdpid")
-#define __DISABLE_RDPID__
-#endif /* __RDPID__ */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdpid_u32 (void)
-{
- return __builtin_ia32_rdpid ();
-}
-#ifdef __DISABLE_RDPID__
-#undef __DISABLE_RDPID__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDPID__ */
-
-#ifdef __x86_64__
-
-#ifndef __FSGSBASE__
-#pragma GCC push_options
-#pragma GCC target("fsgsbase")
-#define __DISABLE_FSGSBASE__
-#endif /* __FSGSBASE__ */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readfsbase_u32 (void)
-{
- return __builtin_ia32_rdfsbase32 ();
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readfsbase_u64 (void)
-{
- return __builtin_ia32_rdfsbase64 ();
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readgsbase_u32 (void)
-{
- return __builtin_ia32_rdgsbase32 ();
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readgsbase_u64 (void)
-{
- return __builtin_ia32_rdgsbase64 ();
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writefsbase_u32 (unsigned int __B)
-{
- __builtin_ia32_wrfsbase32 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writefsbase_u64 (unsigned long long __B)
-{
- __builtin_ia32_wrfsbase64 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writegsbase_u32 (unsigned int __B)
-{
- __builtin_ia32_wrgsbase32 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writegsbase_u64 (unsigned long long __B)
-{
- __builtin_ia32_wrgsbase64 (__B);
-}
-#ifdef __DISABLE_FSGSBASE__
-#undef __DISABLE_FSGSBASE__
-#pragma GCC pop_options
-#endif /* __DISABLE_FSGSBASE__ */
-
-#ifndef __RDRND__
-#pragma GCC push_options
-#pragma GCC target("rdrnd")
-#define __DISABLE_RDRND__
-#endif /* __RDRND__ */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand64_step (unsigned long long *__P)
-{
- return __builtin_ia32_rdrand64_step (__P);
-}
-#ifdef __DISABLE_RDRND__
-#undef __DISABLE_RDRND__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDRND__ */
-
-#endif /* __x86_64__ */
-
-#ifndef __PTWRITE__
-#pragma GCC push_options
-#pragma GCC target("ptwrite")
-#define __DISABLE_PTWRITE__
-#endif
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_ptwrite64 (unsigned long long __B)
-{
- __builtin_ia32_ptwrite64 (__B);
-}
-#endif /* __x86_64__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_ptwrite32 (unsigned __B)
-{
- __builtin_ia32_ptwrite32 (__B);
-}
-#ifdef __DISABLE_PTWRITE__
-#undef __DISABLE_PTWRITE__
-#pragma GCC pop_options
-#endif /* __DISABLE_PTWRITE__ */
-
#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/intelmic-mkoffload.c b/gcc/config/i386/intelmic-mkoffload.c
index 15b5c3d..668208c 100644
--- a/gcc/config/i386/intelmic-mkoffload.c
+++ b/gcc/config/i386/intelmic-mkoffload.c
@@ -231,7 +231,7 @@ compile_for_target (struct obstack *argv_obstack)
unsetenv ("LIBRARY_PATH");
unsetenv ("LD_RUN_PATH");
- fork_execute (argv[0], argv, false);
+ fork_execute (argv[0], argv, false, NULL);
obstack_free (argv_obstack, NULL);
/* Restore environment variables. */
@@ -455,7 +455,7 @@ generate_host_descr_file (const char *host_compiler)
obstack_ptr_grow (&argv_obstack, NULL);
char **argv = XOBFINISH (&argv_obstack, char **);
- fork_execute (argv[0], argv, false);
+ fork_execute (argv[0], argv, false, NULL);
obstack_free (&argv_obstack, NULL);
return obj_filename;
@@ -538,7 +538,7 @@ prepare_target_image (const char *target_compiler, int argc, char **argv)
obstack_ptr_grow (&argv_obstack, rename_section_opt);
obstack_ptr_grow (&argv_obstack, NULL);
char **new_argv = XOBFINISH (&argv_obstack, char **);
- fork_execute (new_argv[0], new_argv, false);
+ fork_execute (new_argv[0], new_argv, false, NULL);
obstack_free (&argv_obstack, NULL);
/* Objcopy has created symbols, containing the input file name with
@@ -580,7 +580,7 @@ prepare_target_image (const char *target_compiler, int argc, char **argv)
obstack_ptr_grow (&argv_obstack, opt_for_objcopy[2]);
obstack_ptr_grow (&argv_obstack, NULL);
new_argv = XOBFINISH (&argv_obstack, char **);
- fork_execute (new_argv[0], new_argv, false);
+ fork_execute (new_argv[0], new_argv, false, NULL);
obstack_free (&argv_obstack, NULL);
return target_so_filename;
@@ -672,7 +672,7 @@ main (int argc, char **argv)
obstack_ptr_grow (&argv_obstack, out_obj_filename);
obstack_ptr_grow (&argv_obstack, NULL);
char **new_argv = XOBFINISH (&argv_obstack, char **);
- fork_execute (new_argv[0], new_argv, false);
+ fork_execute (new_argv[0], new_argv, false, NULL);
obstack_free (&argv_obstack, NULL);
/* Run objcopy on the resultant object file to localize generated symbols
@@ -688,7 +688,7 @@ main (int argc, char **argv)
obstack_ptr_grow (&argv_obstack, out_obj_filename);
obstack_ptr_grow (&argv_obstack, NULL);
new_argv = XOBFINISH (&argv_obstack, char **);
- fork_execute (new_argv[0], new_argv, false);
+ fork_execute (new_argv[0], new_argv, false, NULL);
obstack_free (&argv_obstack, NULL);
return 0;
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
index d7c3acb..0b5c8bb 100644
--- a/gcc/config/i386/lwpintrin.h
+++ b/gcc/config/i386/lwpintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _LWPINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
index 1863a58..6d00e9f 100644
--- a/gcc/config/i386/lzcntintrin.h
+++ b/gcc/config/i386/lzcntintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
#endif
diff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
index 408e57c..0d0aa93 100644
--- a/gcc/config/i386/mingw-w64.h
+++ b/gcc/config/i386/mingw-w64.h
@@ -98,3 +98,9 @@ along with GCC; see the file COPYING3. If not see
%{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
" LINK_SPEC_LARGE_ADDR_AWARE "\
%(shared_libgcc_undefs)"
+
+/* Enable sincos optimization, overriding cygming.h. sincos, sincosf
+ and sincosl functions are available on mingw-w64, but not on the
+ original mingw32. */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 77de7ca..dff42fd 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -42,9 +42,15 @@
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
+typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
/* Unaligned version of the same type */
typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
+typedef int __m32_u __attribute__ ((__vector_size__ (4), \
+ __may_alias__, __aligned__ (1)));
+typedef short __m16_u __attribute__ ((__vector_size__ (2), \
+ __may_alias__, __aligned__ (1)));
/* Internal data types for implementing the intrinsics. */
typedef int __v2si __attribute__ ((__vector_size__ (8)));
diff --git a/gcc/config/i386/movdirintrin.h b/gcc/config/i386/movdirintrin.h
index e7f374a..b2f8406 100644
--- a/gcc/config/i386/movdirintrin.h
+++ b/gcc/config/i386/movdirintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <movdirintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <movdirintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _MOVDIRINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h
index d2a3261..31c493a 100644
--- a/gcc/config/i386/pconfigintrin.h
+++ b/gcc/config/i386/pconfigintrin.h
@@ -1,5 +1,28 @@
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <pconfigintrin.h> directly; include <immintrin.h> instead."
+/* Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <pconfigintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _PCONFIGINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
index 6840914..0d2dd51 100644
--- a/gcc/config/i386/pkuintrin.h
+++ b/gcc/config/i386/pkuintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <pkuintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <pkuintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _PKUINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b03f9cd..be57cda 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1081,6 +1081,19 @@
(ior (match_operand 0 "register_operand")
(match_operand 0 "vector_memory_operand")))
+(define_predicate "bcst_mem_operand"
+ (and (match_code "vec_duplicate")
+ (and (match_test "TARGET_AVX512F")
+ (ior (match_test "TARGET_AVX512VL")
+ (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
+ (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
+ (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
+
+; Return true when OP is bcst_mem_operand or vector_memory_operand.
+(define_predicate "bcst_vector_operand"
+ (ior (match_operand 0 "vector_operand")
+ (match_operand 0 "bcst_mem_operand")))
+
;; Return true when OP is either nonimmediate operand, or any
;; CONST_VECTOR.
(define_predicate "nonimmediate_or_const_vector_operand"
diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
index efc7cea..168053a 100644
--- a/gcc/config/i386/rdseedintrin.h
+++ b/gcc/config/i386/rdseedintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <rdseedintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <rdseedintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _RDSEEDINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
index 463a989..436e517 100644
--- a/gcc/config/i386/rtmintrin.h
+++ b/gcc/config/i386/rtmintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <rtmintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _RTMINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
index 0c35b9e..95f26d6 100644
--- a/gcc/config/i386/serializeintrin.h
+++ b/gcc/config/i386/serializeintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <serializeintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <serializeintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _SERIALIZE_H_INCLUDED
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 934b60a..52635f6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1756,8 +1756,8 @@
(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(plusminus:VF
- (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
- (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
+ (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
+ (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -1765,35 +1765,7 @@
v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "<mask_prefix3>")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*sub<mode>3<mask_name>_bcst"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (minus:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "v")
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
- "TARGET_AVX512F
- && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
- && <mask_mode512bit_condition>"
- "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
- [(set_attr "prefix" "evex")
- (set_attr "type" "sseadd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*add<mode>3<mask_name>_bcst"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (plus:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
- (match_operand:VF_AVX512 2 "register_operand" "v")))]
- "TARGET_AVX512F
- && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
- && <mask_mode512bit_condition>"
- "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
- [(set_attr "prefix" "evex")
- (set_attr "type" "sseadd")
+ (set_attr "prefix" "<bcst_mask_prefix3>")
(set_attr "mode" "<MODE>")])
;; Standard scalar operation patterns which preserve the rest of the
@@ -1846,32 +1818,19 @@
(define_insn "*mul<mode>3<mask_name><round_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(mult:VF
- (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
- (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
- "TARGET_SSE
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
+ (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
+ "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
mul<ssemodesuffix>\t{%2, %0|%0, %2}
vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssemul")
- (set_attr "prefix" "<mask_prefix3>")
+ (set_attr "prefix" "<bcst_mask_prefix3>")
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<MODE>")])
-(define_insn "*mul<mode>3<mask_name>_bcst"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (mult:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
- (match_operand:VF_AVX512 2 "register_operand" "v")))]
- "TARGET_AVX512F && <mask_mode512bit_condition>"
- "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
- [(set_attr "prefix" "evex")
- (set_attr "type" "ssemul")
- (set_attr "mode" "<MODE>")])
-
;; Standard scalar operation patterns which preserve the rest of the
;; vector for combiner.
(define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
@@ -1943,26 +1902,14 @@
[(set (match_operand:VF 0 "register_operand" "=x,v")
(div:VF
(match_operand:VF 1 "register_operand" "0,v")
- (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
+ (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
div<ssemodesuffix>\t{%2, %0|%0, %2}
vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssediv")
- (set_attr "prefix" "<mask_prefix3>")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (div:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "v")
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
- "TARGET_AVX512F && <mask_mode512bit_condition>"
- "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
- [(set_attr "prefix" "evex")
- (set_attr "type" "ssediv")
+ (set_attr "prefix" "<bcst_mask_prefix3>")
(set_attr "mode" "<MODE>")])
(define_insn "<sse>_rcp<mode>2"
@@ -2861,30 +2808,30 @@
DONE;
})
-(define_insn "<mask_codefor>reducep<mode><mask_name>"
+(define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
(unspec:VF_AVX512VL
- [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
+ [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_REDUCE))]
"TARGET_AVX512DQ"
- "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "reduces<mode><mask_scalar_name>"
+(define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_REDUCE)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512DQ"
- "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
+ "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -4055,9 +4002,9 @@
(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
(fma:VF_SF_AVX512VL
- (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
+ (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4066,46 +4013,6 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (fma:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "%0")
- (match_operand:VF_AVX512 2 "register_operand" "v")
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
- (match_operand:VF_AVX512 2 "register_operand" "0,v")
- (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
- vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "0,v")
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
- (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
- vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VF_AVX512VL
@@ -4171,10 +4078,10 @@
(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
(fma:VF_SF_AVX512VL
- (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
(neg:VF_SF_AVX512VL
- (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4183,49 +4090,6 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (fma:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "%0")
- (match_operand:VF_AVX512 2 "register_operand" "v")
- (neg:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
- (match_operand:VF_AVX512 2 "register_operand" "0,v")
- (neg:VF_AVX512
- (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
- vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "0,v")
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
- (neg:VF_AVX512
- (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
- vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VF_AVX512VL
@@ -4294,9 +4158,9 @@
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
(fma:VF_SF_AVX512VL
(neg:VF_SF_AVX512VL
- (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
- (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
+ (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4305,49 +4169,6 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (fma:VF_AVX512
- (neg:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "%0"))
- (match_operand:VF_AVX512 2 "register_operand" "v")
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (neg:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
- (match_operand:VF_AVX512 2 "register_operand" "0,v")
- (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
- vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (neg:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "0,v"))
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
- (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
- vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VF_AVX512VL
@@ -4417,10 +4238,10 @@
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
(fma:VF_SF_AVX512VL
(neg:VF_SF_AVX512VL
- (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
- (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
(neg:VF_SF_AVX512VL
- (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4429,52 +4250,6 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
- (fma:VF_AVX512
- (neg:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "%0"))
- (match_operand:VF_AVX512 2 "register_operand" "v")
- (neg:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (neg:VF_AVX512
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
- (match_operand:VF_AVX512 2 "register_operand" "0,v")
- (neg:VF_AVX512
- (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
- vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
- [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
- (fma:VF_AVX512
- (neg:VF_AVX512
- (match_operand:VF_AVX512 1 "register_operand" "0,v"))
- (vec_duplicate:VF_AVX512
- (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
- (neg:VF_AVX512
- (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
- "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
- "@
- vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
- vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VF_AVX512VL
@@ -6374,7 +6149,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
-(define_insn "sse2_cvtsd2ss<round_name>"
+(define_insn "sse2_cvtsd2ss<mask_name><round_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
@@ -6386,7 +6161,7 @@
"@
cvtsd2ss\t{%2, %0|%0, %2}
cvtsd2ss\t{%2, %0|%0, %q2}
- vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
+ vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double,*")
@@ -6417,7 +6192,7 @@
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "SF")])
-(define_insn "sse2_cvtss2sd<round_saeonly_name>"
+(define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(float_extend:V2DF
@@ -6430,7 +6205,7 @@
"@
cvtss2sd\t{%2, %0|%0, %2}
cvtss2sd\t{%2, %0|%0, %k2}
- vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
+ vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "vector,double,*")
@@ -11563,8 +11338,8 @@
(define_insn "*<plusminus_insn><mode>3"
[(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
(plusminus:VI_AVX2
- (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
- (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
+ (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -11572,31 +11347,7 @@
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*sub<mode>3_bcst"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
- (minus:VI48_AVX512VL
- (match_operand:VI48_AVX512VL 1 "register_operand" "v")
- (vec_duplicate:VI48_AVX512VL
- (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
- "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
- [(set_attr "type" "sseiadd")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*add<mode>3_bcst"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
- (plus:VI48_AVX512VL
- (vec_duplicate:VI48_AVX512VL
- (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
- (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
- "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
- [(set_attr "type" "sseiadd")
- (set_attr "prefix" "evex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*<plusminus_insn><mode>3_mask"
@@ -12110,24 +11861,13 @@
(set_attr "mode" "TI")])
(define_insn "avx512dq_mul<mode>3<mask_name>"
- [(set (match_operand:VI8 0 "register_operand" "=v")
- (mult:VI8
- (match_operand:VI8 1 "register_operand" "v")
- (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512DQ && <mask_mode512bit_condition>"
- "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "type" "sseimul")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*avx512dq_mul<mode>3<mask_name>_bcst"
[(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
(mult:VI8_AVX512VL
- (vec_duplicate:VI8_AVX512VL
- (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
- (match_operand:VI8_AVX512VL 2 "register_operand" "v")))]
- "TARGET_AVX512DQ"
- "vpmullq\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
+ (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
+ (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
+ "TARGET_AVX512DQ && <mask_mode512bit_condition>
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -12157,10 +11897,10 @@
(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
[(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
(mult:VI4_AVX512F
- (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
- (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
- "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
- && <mask_mode512bit_condition>"
+ (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
+ (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmulld\t{%2, %0|%0, %2}
pmulld\t{%2, %0|%0, %2}
@@ -12168,22 +11908,10 @@
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "<mask_prefix4>")
+ (set_attr "prefix" "<bcst_mask_prefix4>")
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "*avx512f_mul<mode>3<mask_name>_bcst"
- [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
- (mult:VI4_AVX512VL
- (vec_duplicate:VI4_AVX512VL
- (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
- (match_operand:VI4_AVX512VL 2 "register_operand" "v")))]
- "TARGET_AVX512F"
- "vpmulld\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
- [(set_attr "type" "sseimul")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_expand "mul<mode>3"
[(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
(mult:VI8_AVX2_AVX512F
@@ -13210,7 +12938,7 @@
[(set (match_operand:VI 0 "register_operand" "=x,x,v")
(and:VI
(not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
- (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
+ (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
"TARGET_SSE"
{
char buf[64];
@@ -13309,19 +13037,6 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "*andnot<mode>3_bcst"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
- (and:VI48_AVX512VL
- (not:VI48_AVX512VL
- (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
- (vec_duplicate:VI48_AVX512VL
- (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
- "TARGET_AVX512F"
- "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "*andnot<mode>3_mask"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
@@ -13351,10 +13066,10 @@
(define_insn "<mask_codefor><code><mode>3<mask_name>"
[(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
(any_logic:VI48_AVX_AVX512F
- (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
- (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
+ (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
+ (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
"TARGET_SSE && <mask_mode512bit_condition>
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
char buf[64];
const char *ops;
@@ -13540,18 +13255,6 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "*<code><mode>3_bcst"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
- (any_logic:VI48_AVX512VL
- (vec_duplicate:VI48_AVX512VL
- (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
- (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
- "TARGET_AVX512F && <mask_avx512vl_condition>"
- "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
- [(set_attr "type" "sseiadd")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_mode_iterator VI1248_AVX512VLBW
[(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
(V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
@@ -19092,7 +18795,7 @@
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
+(define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -19101,7 +18804,7 @@
(match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512ER"
- "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
+ "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
[(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "type" "sse")
@@ -19118,7 +18821,7 @@
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
+(define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -19127,7 +18830,7 @@
(match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512ER"
- "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
+ "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
[(set_attr "length_immediate" "1")
(set_attr "type" "sse")
(set_attr "prefix" "evex")
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 58ea9dc..e037a96 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -60,7 +60,9 @@
(define_subst_attr "mask_prefix" "mask" "vex" "evex")
(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex,evex")
+(define_subst_attr "bcst_mask_prefix3" "mask" "orig,maybe_evex" "evex,evex")
(define_subst_attr "mask_prefix4" "mask" "orig,orig,vex" "evex,evex,evex")
+(define_subst_attr "bcst_mask_prefix4" "mask" "orig,orig,maybe_evex" "evex,evex,evex")
(define_subst_attr "mask_expand_op3" "mask" "3" "5")
(define_subst "mask"
@@ -130,9 +132,11 @@
(define_subst_attr "round_mask_op4" "round" "" "<round_mask_operand4>")
(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
(define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "bcst_round_constraint" "round" "vmBr" "v")
(define_subst_attr "round_constraint2" "round" "m" "v")
(define_subst_attr "round_constraint3" "round" "rm" "r")
(define_subst_attr "round_nimm_predicate" "round" "vector_operand" "register_operand")
+(define_subst_attr "bcst_round_nimm_predicate" "round" "bcst_vector_operand" "register_operand")
(define_subst_attr "round_nimm_scalar_predicate" "round" "nonimmediate_operand" "register_operand")
(define_subst_attr "round_prefix" "round" "vex" "evex")
(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode
diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index 5134e08..e5fb061 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -17,7 +17,8 @@
# <http://www.gnu.org/licenses/>.
OPTIONS_H_EXTRA += $(srcdir)/config/i386/stringop.def
-TM_H += $(srcdir)/config/i386/x86-tune.def
+TM_H += $(srcdir)/config/i386/x86-tune.def \
+ $(srcdir)/common/config/i386/i386-cpuinfo.h
PASSES_EXTRA += $(srcdir)/config/i386/i386-passes.def
i386-c.o: $(srcdir)/config/i386/i386-c.c
diff --git a/gcc/config/i386/t-rtems b/gcc/config/i386/t-rtems
index 7626970..5f078c6 100644
--- a/gcc/config/i386/t-rtems
+++ b/gcc/config/i386/t-rtems
@@ -17,10 +17,10 @@
# <http://www.gnu.org/licenses/>.
#
-MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro msoft-float
+MULTILIB_OPTIONS = march=i486/march=pentium/march=pentiumpro msoft-float
MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float
MULTILIB_MATCHES = msoft-float=mno-80387
-MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?athlon
+MULTILIB_MATCHES += march?pentium=march?k6 march?pentiumpro=march?athlon
MULTILIB_EXCEPTIONS = \
-mtune=pentium/*msoft-float* \
-mtune=pentiumpro/*msoft-float*
+march=pentium/*msoft-float* \
+march=pentiumpro/*msoft-float*
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
index c8a9d77..e03bf91 100644
--- a/gcc/config/i386/tbmintrin.h
+++ b/gcc/config/i386/tbmintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <tbmintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _TBMINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h
index 08b76a9..eab36d0 100644
--- a/gcc/config/i386/tsxldtrkintrin.h
+++ b/gcc/config/i386/tsxldtrkintrin.h
@@ -1,5 +1,28 @@
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <tsxldtrkintrin.h> directly; include <immintrin.h> instead."
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <tsxldtrkintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _TSXLDTRKINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
new file mode 100644
index 0000000..991f642
--- /dev/null
+++ b/gcc/config/i386/uintrintrin.h
@@ -0,0 +1,87 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <uintrintrin.h> directly; include <x86gprintrin.h> instead."
+#endif
+
+#ifndef _UINTRNTRIN_H_INCLUDED
+#define _UINTRNTRIN_H_INCLUDED
+
+#ifdef __x86_64__
+
+#ifndef __UINTR__
+#pragma GCC push_options
+#pragma GCC target ("uintr")
+#define __DISABLE_UINTR__
+#endif /* __UINTR__ */
+
+struct __uintr_frame
+{
+ /* The position of the most significant bit set in user-interrupt
+ request register. */
+ unsigned long long uirrv;
+ /* RIP of the interrupted user process. */
+ unsigned long long rip;
+ /* RFLAGS of the interrupted user process. */
+ unsigned long long rflags;
+ /* RSP of the interrupted user process. */
+ unsigned long long rsp;
+};
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_clui (void)
+{
+ __builtin_ia32_clui ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_stui (void)
+{
+ __builtin_ia32_stui ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_senduipi (unsigned long long __R)
+{
+ __builtin_ia32_senduipi (__R);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_testui (void)
+{
+ return __builtin_ia32_testui ();
+}
+
+#ifdef __DISABLE_UINTR__
+#undef __DISABLE_UINTR__
+#pragma GCC pop_options
+#endif /* __DISABLE_UINTR__ */
+
+#endif
+
+#endif /* _UINTRNTRIN_H_INCLUDED. */
diff --git a/gcc/config/i386/waitpkgintrin.h b/gcc/config/i386/waitpkgintrin.h
index 5dbcde3..5046c98 100644
--- a/gcc/config/i386/waitpkgintrin.h
+++ b/gcc/config/i386/waitpkgintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <waitpkgintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <waitpkgintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _WAITPKG_H_INCLUDED
diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h
index 5393698..7089e61 100644
--- a/gcc/config/i386/wbnoinvdintrin.h
+++ b/gcc/config/i386/wbnoinvdintrin.h
@@ -1,5 +1,28 @@
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <wbnoinvdintrin.h> directly; include <immintrin.h> instead."
+/* Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <wbnoinvdintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _WBNOINVDINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
new file mode 100644
index 0000000..ffe07e4
--- /dev/null
+++ b/gcc/config/i386/x86gprintrin.h
@@ -0,0 +1,256 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+#define _X86GPRINTRIN_H_INCLUDED
+
+#include <ia32intrin.h>
+
+#ifndef __iamcu__
+
+#include <stddef.h>
+
+#include <adxintrin.h>
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#include <cetintrin.h>
+
+#include <cldemoteintrin.h>
+
+#include <clflushoptintrin.h>
+
+#include <clwbintrin.h>
+
+#include <clzerointrin.h>
+
+#include <enqcmdintrin.h>
+
+#include <fxsrintrin.h>
+
+#include <lzcntintrin.h>
+
+#include <lwpintrin.h>
+
+#include <movdirintrin.h>
+
+#include <mwaitxintrin.h>
+
+#include <pconfigintrin.h>
+
+#include <popcntintrin.h>
+
+#include <pkuintrin.h>
+
+#include <rdseedintrin.h>
+
+#include <rtmintrin.h>
+
+#include <serializeintrin.h>
+
+#include <sgxintrin.h>
+
+#include <tbmintrin.h>
+
+#include <tsxldtrkintrin.h>
+
+#include <uintrintrin.h>
+
+#include <waitpkgintrin.h>
+
+#include <wbnoinvdintrin.h>
+
+#include <xsaveintrin.h>
+
+#include <xsavecintrin.h>
+
+#include <xsaveoptintrin.h>
+
+#include <xsavesintrin.h>
+
+#include <xtestintrin.h>
+
+#include <hresetintrin.h>
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_wbinvd (void)
+{
+ __builtin_ia32_wbinvd ();
+}
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand16_step (unsigned short *__P)
+{
+ return __builtin_ia32_rdrand16_step (__P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand32_step (unsigned int *__P)
+{
+ return __builtin_ia32_rdrand32_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#ifndef __RDPID__
+#pragma GCC push_options
+#pragma GCC target("rdpid")
+#define __DISABLE_RDPID__
+#endif /* __RDPID__ */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdpid_u32 (void)
+{
+ return __builtin_ia32_rdpid ();
+}
+#ifdef __DISABLE_RDPID__
+#undef __DISABLE_RDPID__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDPID__ */
+
+#ifdef __x86_64__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __DISABLE_FSGSBASE__
+#endif /* __FSGSBASE__ */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u32 (void)
+{
+ return __builtin_ia32_rdfsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u64 (void)
+{
+ return __builtin_ia32_rdfsbase64 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u32 (void)
+{
+ return __builtin_ia32_rdgsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u64 (void)
+{
+ return __builtin_ia32_rdgsbase64 ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u32 (unsigned int __B)
+{
+ __builtin_ia32_wrfsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u64 (unsigned long long __B)
+{
+ __builtin_ia32_wrfsbase64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u32 (unsigned int __B)
+{
+ __builtin_ia32_wrgsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u64 (unsigned long long __B)
+{
+ __builtin_ia32_wrgsbase64 (__B);
+}
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#pragma GCC pop_options
+#endif /* __DISABLE_FSGSBASE__ */
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand64_step (unsigned long long *__P)
+{
+ return __builtin_ia32_rdrand64_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#endif /* __x86_64__ */
+
+#ifndef __PTWRITE__
+#pragma GCC push_options
+#pragma GCC target("ptwrite")
+#define __DISABLE_PTWRITE__
+#endif
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite64 (unsigned long long __B)
+{
+ __builtin_ia32_ptwrite64 (__B);
+}
+#endif /* __x86_64__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite32 (unsigned __B)
+{
+ __builtin_ia32_ptwrite32 (__B);
+}
+#ifdef __DISABLE_PTWRITE__
+#undef __DISABLE_PTWRITE__
+#pragma GCC pop_options
+#endif /* __DISABLE_PTWRITE__ */
+
+#endif /* __iamcu__ */
+
+#endif /* _X86GPRINTRIN_H_INCLUDED. */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 59fdceb..bc6cb40 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -24,7 +24,7 @@
#ifndef _X86INTRIN_H_INCLUDED
#define _X86INTRIN_H_INCLUDED
-#include <ia32intrin.h>
+#include <x86gprintrin.h>
#ifndef __iamcu__
@@ -37,16 +37,6 @@
#include <xopintrin.h>
-#include <lwpintrin.h>
-
-#include <tbmintrin.h>
-
-#include <popcntintrin.h>
-
-#include <mwaitxintrin.h>
-
-#include <clzerointrin.h>
-
#endif /* __iamcu__ */
#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xsavecintrin.h b/gcc/config/i386/xsavecintrin.h
index 039e215..06c9f36 100644
--- a/gcc/config/i386/xsavecintrin.h
+++ b/gcc/config/i386/xsavecintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsavecintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsavecintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _XSAVECINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
index 9f0b8bb..f9cac0d 100644
--- a/gcc/config/i386/xsaveintrin.h
+++ b/gcc/config/i386/xsaveintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsaveintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsaveintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _XSAVEINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
index 9da3297..4f2756b 100644
--- a/gcc/config/i386/xsaveoptintrin.h
+++ b/gcc/config/i386/xsaveoptintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsaveoptintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _XSAVEOPTINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xsavesintrin.h b/gcc/config/i386/xsavesintrin.h
index 264f1c4..629a1f3 100644
--- a/gcc/config/i386/xsavesintrin.h
+++ b/gcc/config/i386/xsavesintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsavesintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsavesintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _XSAVESINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
index cb187e4..757cc34 100644
--- a/gcc/config/i386/xtestintrin.h
+++ b/gcc/config/i386/xtestintrin.h
@@ -21,8 +21,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xtestintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _XTESTINTRIN_H_INCLUDED
diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h
index 3759187..c52778b 100644
--- a/gcc/config/linux-protos.h
+++ b/gcc/config/linux-protos.h
@@ -19,4 +19,4 @@ along with GCC; see the file COPYING3. If not see
extern bool linux_has_ifunc_p (void);
-extern bool linux_libc_has_function (enum function_class fn_class);
+extern bool linux_libc_has_function (enum function_class fn_class, tree);
diff --git a/gcc/config/linux.c b/gcc/config/linux.c
index 9876153..83ffff4 100644
--- a/gcc/config/linux.c
+++ b/gcc/config/linux.c
@@ -25,7 +25,8 @@ along with GCC; see the file COPYING3. If not see
#include "linux-protos.h"
bool
-linux_libc_has_function (enum function_class fn_class)
+linux_libc_has_function (enum function_class fn_class,
+ tree type ATTRIBUTE_UNUSED)
{
if (OPTION_GLIBC || OPTION_MUSL)
return true;
diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index f70e61b..ad244bb 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -1346,12 +1346,12 @@
;; instructions, so we provide a pattern to support it here.
(define_insn "andneghi3"
[(set (match_operand:HI 0 "register_operand" "=r")
- (and:HI (neg:HI (match_operand:HI 1 "register_operand" "r"))
+ (and:HI (neg:HI (match_operand:HI 1 "general_operand" "rm"))
(match_operand 2 "immediate_operand" "n")))]
""
"*
if (REGNO (operands[0]) != REGNO (operands[1]))
- return \"MOV.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
+ return \"MOV%X1.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
else
return \"INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
"
diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c
index 4fecb2b..a3c4099 100644
--- a/gcc/config/nvptx/mkoffload.c
+++ b/gcc/config/nvptx/mkoffload.c
@@ -399,7 +399,8 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
obstack_ptr_grow (&argv_obstack, NULL);
const char **new_argv = XOBFINISH (&argv_obstack, const char **);
- fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+ fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
+ ".gccnative_args");
obstack_free (&argv_obstack, NULL);
}
@@ -582,7 +583,8 @@ main (int argc, char **argv)
unsetenv ("COMPILER_PATH");
unsetenv ("LIBRARY_PATH");
- fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+ fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
+ ".gcc_args");
obstack_free (&argv_obstack, NULL);
xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
@@ -594,6 +596,7 @@ main (int argc, char **argv)
fatal_error (input_location, "cannot open intermediate ptx file");
process (in, out);
+ fclose (in);
}
fclose (out);
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 0c590d8..1734947 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -368,6 +368,22 @@ nvptx_name_replacement (const char *name)
return name;
}
+/* Return NULL if NAME contains no dot. Otherwise return a copy of NAME
+ with the dots replaced with dollar signs. */
+
+static char *
+nvptx_replace_dot (const char *name)
+{
+ if (strchr (name, '.') == NULL)
+ return NULL;
+
+ char *p = xstrdup (name);
+ for (size_t i = 0; i < strlen (p); ++i)
+ if (p[i] == '.')
+ p[i] = '$';
+ return p;
+}
+
/* If MODE should be treated as two registers of an inner mode, return
that inner mode. Otherwise return VOIDmode. */
@@ -827,26 +843,12 @@ write_var_marker (FILE *file, bool is_defn, bool globalize, const char *name)
fputs ("\n", file);
}
-/* Write a .func or .kernel declaration or definition along with
- a helper comment for use by ld. S is the stream to write to, DECL
- the decl for the function with name NAME. For definitions, emit
- a declaration too. */
+/* Helper function for write_fn_proto. */
-static const char *
-write_fn_proto (std::stringstream &s, bool is_defn,
- const char *name, const_tree decl)
+static void
+write_fn_proto_1 (std::stringstream &s, bool is_defn,
+ const char *name, const_tree decl)
{
- if (is_defn)
- /* Emit a declaration. The PTX assembler gets upset without it. */
- name = write_fn_proto (s, false, name, decl);
- else
- {
- /* Avoid repeating the name replacement. */
- name = nvptx_name_replacement (name);
- if (name[0] == '*')
- name++;
- }
-
write_fn_marker (s, is_defn, TREE_PUBLIC (decl), name);
/* PTX declaration. */
@@ -929,8 +931,38 @@ write_fn_proto (std::stringstream &s, bool is_defn,
s << ")";
s << (is_defn ? "\n" : ";\n");
+}
- return name;
+/* Write a .func or .kernel declaration or definition along with
+ a helper comment for use by ld. S is the stream to write to, DECL
+ the decl for the function with name NAME. For definitions, emit
+ a declaration too. */
+
+static void
+write_fn_proto (std::stringstream &s, bool is_defn,
+ const char *name, const_tree decl)
+{
+ const char *replacement = nvptx_name_replacement (name);
+ char *replaced_dots = NULL;
+ if (replacement != name)
+ name = replacement;
+ else
+ {
+ replaced_dots = nvptx_replace_dot (name);
+ if (replaced_dots)
+ name = replaced_dots;
+ }
+ if (name[0] == '*')
+ name++;
+
+ if (is_defn)
+ /* Emit a declaration. The PTX assembler gets upset without it. */
+ write_fn_proto_1 (s, false, name, decl);
+
+ write_fn_proto_1 (s, is_defn, name, decl);
+
+ if (replaced_dots)
+ XDELETE (replaced_dots);
}
/* Construct a function declaration from a call insn. This can be
@@ -942,6 +974,8 @@ static void
write_fn_proto_from_insn (std::stringstream &s, const char *name,
rtx result, rtx pat)
{
+ char *replaced_dots = NULL;
+
if (!name)
{
s << "\t.callprototype ";
@@ -949,7 +983,15 @@ write_fn_proto_from_insn (std::stringstream &s, const char *name,
}
else
{
- name = nvptx_name_replacement (name);
+ const char *replacement = nvptx_name_replacement (name);
+ if (replacement != name)
+ name = replacement;
+ else
+ {
+ replaced_dots = nvptx_replace_dot (name);
+ if (replaced_dots)
+ name = replaced_dots;
+ }
write_fn_marker (s, false, true, name);
s << "\t.extern .func ";
}
@@ -958,6 +1000,8 @@ write_fn_proto_from_insn (std::stringstream &s, const char *name,
write_return_mode (s, true, GET_MODE (result));
s << name;
+ if (replaced_dots)
+ XDELETE (replaced_dots);
int arg_end = XVECLEN (pat, 0);
for (int i = 1; i < arg_end; i++)
@@ -2101,7 +2145,7 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
val = INTVAL (XEXP (x, 1));
x = XEXP (x, 0);
gcc_assert (GET_CODE (x) == SYMBOL_REF);
- /* FALLTHROUGH */
+ gcc_fallthrough (); /* FALLTHROUGH */
case SYMBOL_REF:
gcc_assert (size == init_frag.size);
@@ -2349,6 +2393,7 @@ const char *
nvptx_output_mov_insn (rtx dst, rtx src)
{
machine_mode dst_mode = GET_MODE (dst);
+ machine_mode src_mode = GET_MODE (src);
machine_mode dst_inner = (GET_CODE (dst) == SUBREG
? GET_MODE (XEXP (dst, 0)) : dst_mode);
machine_mode src_inner = (GET_CODE (src) == SUBREG
@@ -2375,7 +2420,7 @@ nvptx_output_mov_insn (rtx dst, rtx src)
if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner))
{
if (GET_MODE_BITSIZE (dst_mode) == 128
- && GET_MODE_BITSIZE (GET_MODE (src)) == 128)
+ && GET_MODE_BITSIZE (src_mode) == 128)
{
/* mov.b128 is not supported. */
if (dst_inner == V2DImode && src_inner == TImode)
@@ -2388,6 +2433,10 @@ nvptx_output_mov_insn (rtx dst, rtx src)
return "%.\tmov.b%T0\t%0, %1;";
}
+ if (GET_MODE_BITSIZE (src_inner) == 128
+ && GET_MODE_BITSIZE (src_mode) == 64)
+ return "%.\tmov.b%T0\t%0, %1;";
+
return "%.\tcvt%t0%t1\t%0, %1;";
}
@@ -2458,9 +2507,20 @@ nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
if (decl)
{
+ char *replaced_dots = NULL;
const char *name = get_fnname_from_decl (decl);
- name = nvptx_name_replacement (name);
+ const char *replacement = nvptx_name_replacement (name);
+ if (replacement != name)
+ name = replacement;
+ else
+ {
+ replaced_dots = nvptx_replace_dot (name);
+ if (replaced_dots)
+ name = replaced_dots;
+ }
assemble_name (asm_out_file, name);
+ if (replaced_dots)
+ XDELETE (replaced_dots);
}
else
output_address (VOIDmode, callee);
@@ -2598,7 +2658,7 @@ nvptx_print_operand (FILE *file, rtx x, int code)
{
case 'A':
x = XEXP (x, 0);
- /* FALLTHROUGH. */
+ gcc_fallthrough (); /* FALLTHROUGH. */
case 'D':
if (GET_CODE (x) == CONST)
@@ -6531,6 +6591,23 @@ nvptx_set_current_function (tree fndecl)
oacc_bcast_partition = 0;
}
+/* Implement TARGET_LIBC_HAS_FUNCTION. */
+
+bool
+nvptx_libc_has_function (enum function_class fn_class, tree type)
+{
+ if (fn_class == function_sincos)
+ {
+ if (type != NULL_TREE)
+ /* Currently, newlib does not support sincosl. */
+ return type == float_type_node || type == double_type_node;
+ else
+ return true;
+ }
+
+ return default_libc_has_function (fn_class, type);
+}
+
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -6676,6 +6753,9 @@ nvptx_set_current_function (tree fndecl)
#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION nvptx_libc_has_function
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-nvptx.h"
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 6ebcc76..17fe157 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -29,7 +29,10 @@
#define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
-#define ASM_SPEC "%{misa=*:-m %*}"
+/* Default needs to be in sync with default for misa in nvptx.opt.
+ We add a default here to work around a hard-coded sm_30 default in
+ nvptx-as. */
+#define ASM_SPEC "%{misa=*:-m %*; :-m sm_35}"
#define TARGET_CPU_CPP_BUILTINS() \
do \
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 6178e6a..ccbcd09 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -146,6 +146,13 @@
return true;
})
+;; Test for a function symbol ref operand
+(define_predicate "symbol_ref_function_operand"
+ (match_code "symbol_ref")
+{
+ return SYMBOL_REF_FUNCTION_P (op);
+})
+
(define_attr "predicable" "false,true"
(const_string "true"))
@@ -241,6 +248,17 @@
}
[(set_attr "subregs_ok" "true")])
+;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
+(define_split
+ [(set (match_operand:DI 0 "nvptx_register_operand")
+ (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
+ (match_operand 2 "const_int_operand"))))]
+ ""
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+ ]
+ "")
+
(define_insn "*mov<mode>_insn"
[(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
(match_operand:SDFM 1 "general_operand" "RF,m,R"))]
@@ -365,9 +383,13 @@
[(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
(truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
""
- "@
- %.\\tcvt%t0.u32\\t%0, %1;
- %.\\tst%A0.u%T0\\t%0, %1;"
+ {
+ if (which_alternative == 1)
+ return "%.\\tst%A0.u%T0\\t%0, %1;";
+ if (GET_MODE (operands[0]) == QImode)
+ return "%.\\tmov%t0\\t%0, %1;";
+ return "%.\\tcvt%t0.u32\\t%0, %1;";
+ }
[(set_attr "subregs_ok" "true")])
(define_insn "truncdi<mode>2"
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 75c3d54..045e354 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -17,9 +17,11 @@
; along with GCC; see the file COPYING3. If not see
; <http://www.gnu.org/licenses/>.
-m32
-Target Report RejectNegative InverseMask(ABI64)
-Generate code for a 32-bit ABI.
+; It's not clear whether this was ever build/tested/used, so this is no longer
+; exposed to the user.
+;m32
+;Target Report RejectNegative InverseMask(ABI64)
+;Generate code for a 32-bit ABI.
m64
Target Report RejectNegative Mask(ABI64)
@@ -37,7 +39,7 @@ msoft-stack
Target Report Mask(SOFT_STACK)
Use custom stacks instead of local memory for automatic storage.
-msoft-stack-reserve-local
+msoft-stack-reserve-local=
Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128)
Specify size of .local memory used for stack when the exact amount is not known.
@@ -59,6 +61,7 @@ Enum(ptx_isa) String(sm_30) Value(PTX_ISA_SM30)
EnumValue
Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35)
+; Default needs to be in sync with default in ASM_SPEC in nvptx.h.
misa=
-Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM30)
+Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM35)
Specify the version of the ptx ISA to use.
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
index 794bf8e..2820720 100644
--- a/gcc/config/pa/pa-hpux11.h
+++ b/gcc/config/pa/pa-hpux11.h
@@ -154,11 +154,6 @@ along with GCC; see the file COPYING3. If not see
%{!mt:%{!pthread:-a shared -lc -a archive}}}}\
%{shared:%{mt|pthread:-lpthread}}"
-/* The libgcc_stub.a library needs to come last. */
-#undef LINK_GCC_C_SEQUENCE_SPEC
-#define LINK_GCC_C_SEQUENCE_SPEC \
- "%G %{!nolibc:%L} %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}}}"
-
#undef STARTFILE_SPEC
#define STARTFILE_SPEC \
"%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h
index f271bbf..970722a 100644
--- a/gcc/config/pa/pa32-linux.h
+++ b/gcc/config/pa/pa32-linux.h
@@ -57,6 +57,11 @@ call_ ## FUNC (void) \
}
#endif
+/* We need to link against libgcc.a for __canonicalize_funcptr_for_compare
+ and $$dyncall. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC "libgcc.a%s"
+
#undef WCHAR_TYPE
#define WCHAR_TYPE "long int"
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index c7d127f7..096aa4b 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -103,12 +103,6 @@ along with GCC; see the file COPYING3. If not see
%{shared:%{mt|pthread:-lpthread}}"
#endif
-/* The libgcc_stub.a and milli.a libraries need to come last. */
-#undef LINK_GCC_C_SEQUENCE_SPEC
-#define LINK_GCC_C_SEQUENCE_SPEC "\
- %G %{!nolibc:%L} %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}\
- milli.a%s}}"
-
/* Under hpux11, the normal location of the `ld' and `as' programs is the
/usr/ccs/bin directory. */
@@ -335,8 +329,12 @@ do { \
%{static:crtbeginT%O%s} %{!static:%{!shared:crtbegin%O%s} \
%{shared:crtbeginS%O%s}}"
#endif
+
+/* The libgcc_stub.a and milli.a libraries must come last. We need
+ to link with these libraries whenever start files are needed. */
#undef ENDFILE_SPEC
-#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+#define ENDFILE_SPEC \
+ "%{!shared:crtend%O%s libgcc_stub.a%s} %{shared:crtendS%O%s} milli.a%s"
/* Since HP uses the .init and .fini sections for array initializers
and finalizers, we need different defines for INIT_SECTION_ASM_OP
diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator
index 8f4df18..57ee7c3 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -22,14 +22,26 @@
# Each argument to this script is of the form
# <primary arch>-<abi>-<additional arches>-<extensions>
-# For example,
+# Example 1:
# rv32imafd-ilp32d-rv32g-c,v
# means that, in addition to rv32imafd, these configurations can also use the
# rv32imafd-ilp32d libraries: rv32imafdc, rv32imafdv, rv32g, rv32gc, rv32gv
+#
+# Example 2:
+# rv32imafd-ilp32d--c*b
+# means that, in addition to rv32imafd, these configurations can also use the
+# rv32imafd-ilp32d libraries: rv32imafdc-ilp32d, rv32imafdb-ilp32d,
+# rv32imafdcb-ilp32d
from __future__ import print_function
import sys
import collections
+import itertools
+from functools import reduce
+
+#
+# TODO: Add test for this script.
+#
arches = collections.OrderedDict()
abis = collections.OrderedDict()
@@ -37,37 +49,53 @@ required = []
reuse = []
canonical_order = "mafdgqlcbjtpvn"
+LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
+
+#
+# IMPLIED_EXT(ext) -> implied extension list.
+#
+IMPLIED_EXT = {
+ "d" : ["f"],
+}
def arch_canonicalize(arch):
- # TODO: Support implied extensions, e.g. D implied F in latest spec.
# TODO: Support extension version.
new_arch = ""
if arch[:5] in ['rv32e', 'rv32i', 'rv32g', 'rv64i', 'rv64g']:
- # TODO: We should expand g to imadzifencei once we support newer spec.
+ # TODO: We should expand g to imad_zifencei once we support newer spec.
new_arch = arch[:5].replace("g", "imafd")
else:
raise Exception("Unexpected arch: `%s`" % arch[:5])
# Find any Z, S, H or X
- long_ext_prefixes = ['z', 's', 'h', 'x']
- long_ext_prefixes_idx = map(lambda x: arch.find(x), long_ext_prefixes)
+ long_ext_prefixes_idx = map(lambda x: arch.find(x), LONG_EXT_PREFIXES)
# Filter out any non-existent index.
long_ext_prefixes_idx = list(filter(lambda x: x != -1, long_ext_prefixes_idx))
if long_ext_prefixes_idx:
first_long_ext_idx = min(long_ext_prefixes_idx)
long_exts = arch[first_long_ext_idx:].split("_")
- std_exts = arch[5:first_long_ext_idx]
+ std_exts = list(arch[5:first_long_ext_idx])
else:
long_exts = []
- std_exts = arch[5:]
+ std_exts = list(arch[5:])
+
+ #
+ # Handle implied extensions.
+ #
+ for ext in std_exts + long_exts:
+ if ext in IMPLIED_EXT:
+ implied_exts = IMPLIED_EXT[ext]
+ for implied_ext in implied_exts:
+ if implied_ext not in std_exts + long_exts:
+ long_exts.append(implied_ext)
# Single letter extension might appear in the long_exts list,
# becasue we just append extensions list to the arch string.
- std_exts += "".join(filter(lambda x:len(x) == 1, long_exts))
+ std_exts += list(filter(lambda x:len(x) == 1, long_exts))
# Multi-letter extension must be in lexicographic order.
- long_exts = sorted(filter(lambda x:len(x) != 1, long_exts))
+ long_exts = list(sorted(filter(lambda x:len(x) != 1, long_exts)))
# Put extensions in canonical order.
for ext in canonical_order:
@@ -86,15 +114,98 @@ def arch_canonicalize(arch):
new_arch += "_" + "_".join(long_exts)
return new_arch
+#
+# add underline for each multi-char extensions.
+# e.g. ["a", "zfh"] -> ["a", "_zfh"]
+#
+def add_underline_prefix(ext):
+ for long_ext_prefix in LONG_EXT_PREFIXES:
+ if ext.startswith(long_ext_prefix):
+ return "_" + ext
+
+ return ext
+
+#
+# Handle expansion operation.
+#
+# e.g. "a*b" -> [("a",), ("b",), ("a", "b")]
+# "a" -> [("a",)]
+#
+def _expand_combination(ext):
+ exts = list(ext.split("*"))
+
+ # No need to expand if there is no `*`.
+ if len(exts) == 1:
+ return [(exts[0],)]
+
+ # Add underline to every extension.
+ # e.g.
+ # _b * zvamo => _b * _zvamo
+ exts = list(map(lambda x: '_' + x, exts))
+
+ # Generate combination!
+ ext_combs = []
+ for comb_len in range(1, len(exts)+1):
+ for ext_comb in itertools.combinations(exts, comb_len):
+ ext_combs.append(ext_comb)
+
+ return ext_combs
+
+#
+# Input a list and drop duplicated entry.
+# e.g.
+# ["a", "b", "ab", "a"] -> ["a", "b", "ab"]
+#
+def unique(x):
+ #
+ # Drop duplicated entry.
+ # Convert list to set and then convert back to list.
+ #
+ # Add sorted to prevent non-deterministic results in different env.
+ #
+ return list(sorted(list(set(x))))
+
+#
+# Expand EXT string if there is any expansion operator (*).
+# e.g.
+# "a*b,c" -> ["a", "b", "ab", "c"]
+#
+def expand_combination(ext):
+ ext = list(filter(None, ext.split(',')))
+
+ # Expand combination for EXT, got lots of list.
+ # e.g.
+ # a * b => [[("a",), ("b",)], [("a", "b")]]
+ ext_combs = list(map(_expand_combination, ext))
+
+ # Then fold to single list.
+ # e.g.
+ # [[("a",), ("b",)], [("a", "b")]] => [("a",), ("b",), ("a", "b")]
+ ext = list(reduce(lambda x, y: x + y, ext_combs, []))
+
+ # Fold the tuple to string.
+ # e.g.
+ # [("a",), ("b",), ("a", "b")] => ["a", "b", "ab"]
+ ext = map(lambda e : reduce(lambda x, y: x + y, e), ext)
+
+ # Drop duplicated entry.
+ ext = unique(ext)
+
+ return ext
+
for cfg in sys.argv[1:]:
(arch, abi, extra, ext) = cfg.split('-')
arch = arch_canonicalize (arch)
arches[arch] = 1
abis[abi] = 1
extra = list(filter(None, extra.split(',')))
- ext = list(filter(None, ext.split(',')))
- alts = sum([[x] + [x + "_" + y for y in ext] for x in [arch] + extra], [])
+ ext_combs = expand_combination(ext)
+ alts = sum([[x] + [x + y for y in ext_combs] for x in [arch] + extra], [])
alts = list(map(arch_canonicalize, alts))
+
+ # Drop duplicated entry.
+ alts = unique(alts)
+
for alt in alts[1:]:
arches[alt] = 1
reuse.append('march.%s/mabi.%s=march.%s/mabi.%s' % (arch, abi, alt, abi))
diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index 735f2f2..c600badb 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -90,12 +90,15 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
builtin_define ("__riscv_cmodel_medlow");
break;
+ case CM_PIC:
+ /* __riscv_cmodel_pic is deprecated, and will removed in next GCC release.
+ see https://github.com/riscv/riscv-c-api-doc/pull/11 */
+ builtin_define ("__riscv_cmodel_pic");
+ /* FALLTHROUGH. */
+
case CM_MEDANY:
builtin_define ("__riscv_cmodel_medany");
break;
- case CM_PIC:
- builtin_define ("__riscv_cmodel_pic");
- break;
}
}
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
new file mode 100644
index 0000000..6a13f3e
--- /dev/null
+++ b/gcc/config/riscv/riscv-cores.def
@@ -0,0 +1,49 @@
+/* List of supported core and tune info for RISC-V.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* This is a list of cores that implement RISC-V.
+
+ Before using #include to read this file, define a macro:
+
+ RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH, TUNE_INFO)
+
+ The CORE_NAME is the name of the core, represented as a string.
+ The ARCH is the default arch of the core, represented as a string,
+ can be NULL if no default arch.
+ The MICRO_ARCH is the name of the core for which scheduling decisions
+ will be made, represented as an identifier.
+ The TUNE_INFO is the detail cost model for this core, represented as an
+ identifier, reference to riscv-tunes.def. */
+
+RISCV_CORE("sifive-e20", "rv32imc", "rocket")
+RISCV_CORE("sifive-e21", "rv32imac", "rocket")
+RISCV_CORE("sifive-e24", "rv32imafc", "rocket")
+RISCV_CORE("sifive-e31", "rv32imac", "sifive-3-series")
+RISCV_CORE("sifive-e34", "rv32imafc", "sifive-3-series")
+RISCV_CORE("sifive-e76", "rv32imafc", "sifive-7-series")
+
+RISCV_CORE("sifive-s21", "rv64imac", "rocket")
+RISCV_CORE("sifive-s51", "rv64imac", "sifive-5-series")
+RISCV_CORE("sifive-s54", "rv64imafdc", "sifive-5-series")
+RISCV_CORE("sifive-s76", "rv64imafdc", "sifive-7-series")
+
+RISCV_CORE("sifive-u54", "rv64imafdc", "sifive-5-series")
+RISCV_CORE("sifive-u74", "rv64imafdc", "sifive-7-series")
+
+#undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 358224a..256dab1 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -94,4 +94,18 @@ extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
+/* Information about one CPU we know about. */
+struct riscv_cpu_info {
+ /* This CPU's canonical name. */
+ const char *name;
+
+ /* Default arch for this CPU, could be NULL if no default arch. */
+ const char *arch;
+
+ /* Which automaton to use for tuning. */
+ const char *tune;
+};
+
+extern const riscv_cpu_info *riscv_find_cpu (const char *);
+
#endif /* ! GCC_RISCV_PROTOS_H */
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 63b0c38..989a9f1 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -209,7 +209,7 @@ struct riscv_integer_op {
/* Costs of various operations on the different architectures. */
-struct riscv_tune_info
+struct riscv_tune_param
{
unsigned short fp_add[2];
unsigned short fp_mul[2];
@@ -222,16 +222,16 @@ struct riscv_tune_info
bool slow_unaligned_access;
};
-/* Information about one CPU we know about. */
-struct riscv_cpu_info {
- /* This CPU's canonical name. */
+/* Information about one micro-arch we know about. */
+struct riscv_tune_info {
+ /* This micro-arch canonical name. */
const char *name;
/* Which automaton to use for tuning. */
enum riscv_microarchitecture_type microarchitecture;
- /* Tuning parameters for this CPU. */
- const struct riscv_tune_info *tune_info;
+ /* Tuning parameters for this micro-arch. */
+ const struct riscv_tune_param *tune_param;
};
/* Global variables for machine-dependent things. */
@@ -248,7 +248,7 @@ unsigned riscv_stack_boundary;
static int epilogue_cfa_sp_offset;
/* Which tuning parameters to use. */
-static const struct riscv_tune_info *tune_info;
+static const struct riscv_tune_param *tune_param;
/* Which automaton to use for tuning. */
enum riscv_microarchitecture_type riscv_microarchitecture;
@@ -275,7 +275,7 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
};
/* Costs to use when optimizing for rocket. */
-static const struct riscv_tune_info rocket_tune_info = {
+static const struct riscv_tune_param rocket_tune_info = {
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
{COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
@@ -288,7 +288,7 @@ static const struct riscv_tune_info rocket_tune_info = {
};
/* Costs to use when optimizing for Sifive 7 Series. */
-static const struct riscv_tune_info sifive_7_tune_info = {
+static const struct riscv_tune_param sifive_7_tune_info = {
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
{COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
@@ -301,7 +301,7 @@ static const struct riscv_tune_info sifive_7_tune_info = {
};
/* Costs to use when optimizing for size. */
-static const struct riscv_tune_info optimize_size_tune_info = {
+static const struct riscv_tune_param optimize_size_tune_info = {
{COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
{COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
{COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
@@ -343,7 +343,7 @@ static const unsigned gpr_save_reg_order[] = {
};
/* A table describing all the processors GCC knows about. */
-static const struct riscv_cpu_info riscv_cpu_info_table[] = {
+static const struct riscv_tune_info riscv_tune_info_table[] = {
{ "rocket", generic, &rocket_tune_info },
{ "sifive-3-series", generic, &rocket_tune_info },
{ "sifive-5-series", generic, &rocket_tune_info },
@@ -351,17 +351,22 @@ static const struct riscv_cpu_info riscv_cpu_info_table[] = {
{ "size", generic, &optimize_size_tune_info },
};
-/* Return the riscv_cpu_info entry for the given name string. */
+/* Return the riscv_tune_info entry for the given name string. */
-static const struct riscv_cpu_info *
-riscv_parse_cpu (const char *cpu_string)
+static const struct riscv_tune_info *
+riscv_parse_tune (const char *tune_string)
{
- for (unsigned i = 0; i < ARRAY_SIZE (riscv_cpu_info_table); i++)
- if (strcmp (riscv_cpu_info_table[i].name, cpu_string) == 0)
- return riscv_cpu_info_table + i;
+ const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
- error ("unknown cpu %qs for %<-mtune%>", cpu_string);
- return riscv_cpu_info_table;
+ if (cpu)
+ tune_string = cpu->tune;
+
+ for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
+ if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
+ return riscv_tune_info_table + i;
+
+ error ("unknown cpu %qs for %<-mtune%>", tune_string);
+ return riscv_tune_info_table;
}
/* Helper function for riscv_build_integer; arguments are as for
@@ -1703,7 +1708,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
instructions it needs. */
if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
{
- *total = COSTS_N_INSNS (cost + tune_info->memory_cost);
+ *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
return true;
}
/* Otherwise use the default handling. */
@@ -1770,7 +1775,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
mode instead. */
mode = GET_MODE (XEXP (x, 0));
if (float_mode_p)
- *total = tune_info->fp_add[mode == DFmode];
+ *total = tune_param->fp_add[mode == DFmode];
else
*total = riscv_binary_cost (x, 1, 3);
return false;
@@ -1779,19 +1784,19 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case ORDERED:
/* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
+ *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
return false;
case UNEQ:
/* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
+ *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
return false;
case LTGT:
/* (FLT(A, A) || FGT(B, B)). */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
+ *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
return false;
case UNGE:
@@ -1800,13 +1805,13 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case UNLT:
/* FLT or FLE, but guarded by an FFLAGS read and write. */
mode = GET_MODE (XEXP (x, 0));
- *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
+ *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
return false;
case MINUS:
case PLUS:
if (float_mode_p)
- *total = tune_info->fp_add[mode == DFmode];
+ *total = tune_param->fp_add[mode == DFmode];
else
*total = riscv_binary_cost (x, 1, 4);
return false;
@@ -1816,7 +1821,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
rtx op = XEXP (x, 0);
if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
{
- *total = (tune_info->fp_mul[mode == DFmode]
+ *total = (tune_param->fp_mul[mode == DFmode]
+ set_src_cost (XEXP (op, 0), mode, speed)
+ set_src_cost (XEXP (op, 1), mode, speed)
+ set_src_cost (XEXP (op, 2), mode, speed));
@@ -1825,23 +1830,23 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
}
if (float_mode_p)
- *total = tune_info->fp_add[mode == DFmode];
+ *total = tune_param->fp_add[mode == DFmode];
else
*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
return false;
case MULT:
if (float_mode_p)
- *total = tune_info->fp_mul[mode == DFmode];
+ *total = tune_param->fp_mul[mode == DFmode];
else if (!TARGET_MUL)
/* Estimate the cost of a library call. */
*total = COSTS_N_INSNS (speed ? 32 : 6);
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- *total = 3 * tune_info->int_mul[0] + COSTS_N_INSNS (2);
+ *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
else if (!speed)
*total = COSTS_N_INSNS (1);
else
- *total = tune_info->int_mul[mode == DImode];
+ *total = tune_param->int_mul[mode == DImode];
return false;
case DIV:
@@ -1849,7 +1854,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case MOD:
if (float_mode_p)
{
- *total = tune_info->fp_div[mode == DFmode];
+ *total = tune_param->fp_div[mode == DFmode];
return false;
}
/* Fall through. */
@@ -1860,7 +1865,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
/* Estimate the cost of a library call. */
*total = COSTS_N_INSNS (speed ? 32 : 6);
else if (speed)
- *total = tune_info->int_div[mode == DImode];
+ *total = tune_param->int_div[mode == DImode];
else
*total = COSTS_N_INSNS (1);
return false;
@@ -1882,11 +1887,11 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case FIX:
case FLOAT_EXTEND:
case FLOAT_TRUNCATE:
- *total = tune_info->fp_add[mode == DFmode];
+ *total = tune_param->fp_add[mode == DFmode];
return false;
case FMA:
- *total = (tune_info->fp_mul[mode == DFmode]
+ *total = (tune_param->fp_mul[mode == DFmode]
+ set_src_cost (XEXP (x, 0), mode, speed)
+ set_src_cost (XEXP (x, 1), mode, speed)
+ set_src_cost (XEXP (x, 2), mode, speed));
@@ -4546,7 +4551,7 @@ riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
static int
riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
{
- return (tune_info->memory_cost
+ return (tune_param->memory_cost
+ memory_move_secondary_cost (mode, rclass, in));
}
@@ -4555,7 +4560,7 @@ riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
static int
riscv_issue_rate (void)
{
- return tune_info->issue_rate;
+ return tune_param->issue_rate;
}
/* Auxiliary function to emit RISC-V ELF attribute. */
@@ -4683,7 +4688,7 @@ riscv_init_machine_status (void)
static void
riscv_option_override (void)
{
- const struct riscv_cpu_info *cpu;
+ const struct riscv_tune_info *cpu;
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
@@ -4705,26 +4710,28 @@ riscv_option_override (void)
if (TARGET_HARD_FLOAT && (target_flags_explicit & MASK_FDIV) == 0)
target_flags |= MASK_FDIV;
- /* Handle -mtune. */
- cpu = riscv_parse_cpu (riscv_tune_string ? riscv_tune_string :
- RISCV_TUNE_STRING_DEFAULT);
+ /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
+ if -mtune and -mcpu both not not given. */
+ cpu = riscv_parse_tune (riscv_tune_string ? riscv_tune_string :
+ (riscv_cpu_string ? riscv_cpu_string :
+ RISCV_TUNE_STRING_DEFAULT));
riscv_microarchitecture = cpu->microarchitecture;
- tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info;
+ tune_param = optimize_size ? &optimize_size_tune_info : cpu->tune_param;
/* Use -mtune's setting for slow_unaligned_access, even when optimizing
for size. For architectures that trap and emulate unaligned accesses,
the performance cost is too great, even for -Os. Similarly, if
-m[no-]strict-align is left unspecified, heed -mtune's advice. */
- riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access
+ riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
|| TARGET_STRICT_ALIGN);
if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
- && cpu->tune_info->slow_unaligned_access)
+ && cpu->tune_param->slow_unaligned_access)
target_flags |= MASK_STRICT_ALIGN;
/* If the user hasn't specified a branch cost, use the processor's
default. */
if (riscv_branch_cost == 0)
- riscv_branch_cost = tune_info->branch_cost;
+ riscv_branch_cost = tune_param->branch_cost;
/* Function to allocate machine-dependent function status. */
init_machine_status = &riscv_init_machine_status;
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index b7b4a1c..172c7ca 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -41,17 +41,27 @@ along with GCC; see the file COPYING3. If not see
#endif
extern const char *riscv_expand_arch (int argc, const char **argv);
+extern const char *riscv_expand_arch_from_cpu (int argc, const char **argv);
+extern const char *riscv_default_mtune (int argc, const char **argv);
# define EXTRA_SPEC_FUNCTIONS \
- { "riscv_expand_arch", riscv_expand_arch },
+ { "riscv_expand_arch", riscv_expand_arch }, \
+ { "riscv_expand_arch_from_cpu", riscv_expand_arch_from_cpu }, \
+ { "riscv_default_mtune", riscv_default_mtune },
/* Support for a compile-time default CPU, et cetera. The rules are:
- --with-arch is ignored if -march is specified.
+ --with-arch is ignored if -march or -mcpu is specified.
--with-abi is ignored if -mabi is specified.
- --with-tune is ignored if -mtune is specified. */
+ --with-tune is ignored if -mtune or -mcpu is specified.
+
+ But using default -march/-mtune value if -mcpu don't have valid option. */
#define OPTION_DEFAULT_SPECS \
- {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
- {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+ {"tune", "%{!mtune=*:" \
+ " %{!mcpu=*:-mtune=%(VALUE)}" \
+ " %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" }, \
+ {"arch", "%{!march=*:" \
+ " %{!mcpu=*:-march=%(VALUE)}" \
+ " %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \
{"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
#ifdef IN_LIBGCC2
@@ -69,8 +79,9 @@ extern const char *riscv_expand_arch (int argc, const char **argv);
%(subtarget_asm_spec)"
#undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS \
-"%{march=*:-march=%:riscv_expand_arch(%*)}"
+#define DRIVER_SELF_SPECS \
+"%{march=*:%:riscv_expand_arch(%*)} " \
+"%{!march=*:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} "
#define TARGET_DEFAULT_CMODEL CM_MEDLOW
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index f01d3ab..808b4a0 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -79,6 +79,10 @@ mtune=
Target RejectNegative Joined Var(riscv_tune_string)
-mtune=PROCESSOR Optimize the output for PROCESSOR.
+mcpu=
+Target RejectNegative Joined Var(riscv_cpu_string)
+-mcpu=PROCESSOR Use architecture of and optimize the output for PROCESSOR.
+
msmall-data-limit=
Target Joined Separate UInteger Var(g_switch_value) Init(8)
-msmall-data-limit=N Put global and static data smaller than <number> bytes into a special section (on some targets).
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 4820fb3..702767c 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -24,3 +24,5 @@ riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.c
$(POSTCOMPILE)
PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
+
+$(common_out_file): $(srcdir)/config/riscv/riscv-cores.def
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 8a2dcda..df10a8c 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -236,6 +236,9 @@
#define vec_lvebx __builtin_vec_lvebx
#define vec_lvehx __builtin_vec_lvehx
#define vec_lvewx __builtin_vec_lvewx
+#define vec_xl_zext __builtin_vec_ze_lxvrx
+#define vec_xl_sext __builtin_vec_se_lxvrx
+#define vec_xst_trunc __builtin_vec_tr_stxvrx
#define vec_neg __builtin_vec_neg
#define vec_pmsum_be __builtin_vec_vpmsum
#define vec_shasigma_be __builtin_crypto_vshasigma
diff --git a/gcc/config/rs6000/freebsd64.h b/gcc/config/rs6000/freebsd64.h
index c991363..6984ca5 100644
--- a/gcc/config/rs6000/freebsd64.h
+++ b/gcc/config/rs6000/freebsd64.h
@@ -78,65 +78,7 @@ extern int dot_symbols;
#undef SUBSUBTARGET_OVERRIDE_OPTIONS
#define SUBSUBTARGET_OVERRIDE_OPTIONS \
- do \
- { \
- if (!global_options_set.x_rs6000_alignment_flags) \
- rs6000_alignment_flags = MASK_ALIGN_NATURAL; \
- if (TARGET_64BIT) \
- { \
- if (DEFAULT_ABI != ABI_AIX) \
- { \
- rs6000_current_abi = ABI_AIX; \
- error (INVALID_64BIT, "call"); \
- } \
- dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \
- if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \
- { \
- rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \
- error (INVALID_64BIT, "relocatable"); \
- } \
- if (ELFv2_ABI_CHECK) \
- { \
- rs6000_current_abi = ABI_ELFv2; \
- if (dot_symbols) \
- error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); \
- } \
- if (rs6000_isa_flags & OPTION_MASK_EABI) \
- { \
- rs6000_isa_flags &= ~OPTION_MASK_EABI; \
- error (INVALID_64BIT, "eabi"); \
- } \
- if (TARGET_PROTOTYPE) \
- { \
- target_prototype = 0; \
- error (INVALID_64BIT, "prototype"); \
- } \
- if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \
- { \
- rs6000_isa_flags |= OPTION_MASK_POWERPC64; \
- error ("%<-m64%> requires a PowerPC64 cpu"); \
- } \
- if ((rs6000_isa_flags_explicit \
- & OPTION_MASK_MINIMAL_TOC) != 0) \
- { \
- if (global_options_set.x_rs6000_current_cmodel \
- && rs6000_current_cmodel != CMODEL_SMALL) \
- error ("%<-mcmodel%> incompatible with other toc options"); \
- SET_CMODEL (CMODEL_SMALL); \
- } \
- else \
- { \
- if (!global_options_set.x_rs6000_current_cmodel) \
- SET_CMODEL (CMODEL_MEDIUM); \
- if (rs6000_current_cmodel != CMODEL_SMALL) \
- { \
- TARGET_NO_FP_IN_TOC = 0; \
- TARGET_NO_SUM_IN_TOC = 0; \
- } \
- } \
- } \
- } \
- while (0)
+ do rs6000_linux64_override_options (); while (0)
#undef ASM_SPEC
#undef LINK_OS_FREEBSD_SPEC
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 2ded330..73b6c01 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -96,90 +96,7 @@ extern int dot_symbols;
#undef SUBSUBTARGET_OVERRIDE_OPTIONS
#define SUBSUBTARGET_OVERRIDE_OPTIONS \
- do \
- { \
- if (!global_options_set.x_rs6000_alignment_flags) \
- rs6000_alignment_flags = MASK_ALIGN_NATURAL; \
- if (rs6000_isa_flags & OPTION_MASK_64BIT) \
- { \
- if (DEFAULT_ABI != ABI_AIX) \
- { \
- rs6000_current_abi = ABI_AIX; \
- error (INVALID_64BIT, "call"); \
- } \
- dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \
- if (ELFv2_ABI_CHECK) \
- { \
- rs6000_current_abi = ABI_ELFv2; \
- if (dot_symbols) \
- error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); \
- } \
- if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \
- { \
- rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \
- error (INVALID_64BIT, "relocatable"); \
- } \
- if (rs6000_isa_flags & OPTION_MASK_EABI) \
- { \
- rs6000_isa_flags &= ~OPTION_MASK_EABI; \
- error (INVALID_64BIT, "eabi"); \
- } \
- if (TARGET_PROTOTYPE) \
- { \
- target_prototype = 0; \
- error (INVALID_64BIT, "prototype"); \
- } \
- if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \
- { \
- rs6000_isa_flags |= OPTION_MASK_POWERPC64; \
- error ("%<-m64%> requires a PowerPC64 cpu"); \
- } \
- if ((rs6000_isa_flags_explicit \
- & OPTION_MASK_MINIMAL_TOC) != 0) \
- { \
- if (global_options_set.x_rs6000_current_cmodel \
- && rs6000_current_cmodel != CMODEL_SMALL) \
- error ("%<-mcmodel incompatible with other toc options%>"); \
- SET_CMODEL (CMODEL_SMALL); \
- } \
- else \
- { \
- if (!global_options_set.x_rs6000_current_cmodel) \
- SET_CMODEL (CMODEL_MEDIUM); \
- if (rs6000_current_cmodel != CMODEL_SMALL) \
- { \
- if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
- TARGET_NO_FP_IN_TOC \
- = rs6000_current_cmodel == CMODEL_MEDIUM; \
- if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
- TARGET_NO_SUM_IN_TOC = 0; \
- } \
- } \
- if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2) \
- { \
- if (global_options_set.x_rs6000_pltseq) \
- warning (0, "%qs unsupported for this ABI", \
- "-mpltseq"); \
- rs6000_pltseq = false; \
- } \
- } \
- else \
- { \
- if (!RS6000_BI_ARCH_P) \
- error (INVALID_32BIT, "32"); \
- if (TARGET_PROFILE_KERNEL) \
- { \
- TARGET_PROFILE_KERNEL = 0; \
- error (INVALID_32BIT, "profile-kernel"); \
- } \
- if (global_options_set.x_rs6000_current_cmodel) \
- { \
- SET_CMODEL (CMODEL_SMALL); \
- error (INVALID_32BIT, "cmodel"); \
- } \
- } \
- } \
- while (0)
+ do rs6000_linux64_override_options (); while (0)
#undef ASM_SPEC
#undef LINK_OS_LINUX_SPEC
diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
index 48edc99..e0bce9c 100644
--- a/gcc/config/rs6000/ppc-asm.h
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -262,6 +262,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#undef toc
#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#ifdef __PCREL__
+#define JUMP_TARGET(name) GLUE(FUNC_NAME(name),@notoc)
+#define FUNC_START(name) \
+ .type FUNC_NAME(name),@function; \
+ .globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+ .localentry FUNC_NAME(name),1
+#else
#define JUMP_TARGET(name) FUNC_NAME(name)
#define FUNC_START(name) \
.type FUNC_NAME(name),@function; \
@@ -270,6 +278,7 @@ FUNC_NAME(name): \
0: addis 2,12,(.TOC.-0b)@ha; \
addi 2,2,(.TOC.-0b)@l; \
.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+#endif /* !__PCREL__ */
#define HIDDEN_FUNC(name) \
FUNC_START(name) \
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index e91a48d..5b05da8 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1111,7 +1111,7 @@
| RS6000_BTC_UNARY), \
CODE_FOR_ ## ICODE) /* ICODE */
-#define BU_P10_MISC_2(ENUM, NAME, ATTR, ICODE) \
+#define BU_P10_POWERPC64_MISC_2(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_2 (P10_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_" NAME, /* NAME */ \
RS6000_BTM_P10 \
@@ -1145,6 +1145,14 @@
CODE_FOR_ ## ICODE) /* ICODE */
#endif
+#define BU_P10V_OVERLOAD_X(ENUM, NAME) \
+ RS6000_BUILTIN_X (P10_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
+ "__builtin_vec_" NAME, /* NAME */ \
+ RS6000_BTM_P10, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_SPECIAL), \
+ CODE_FOR_nothing) /* ICODE */
+
/* Power 10 Altivec builtins */
#define BU_P10V_AV_0(ENUM, NAME, ATTR, ICODE) \
@@ -1179,6 +1187,15 @@
| RS6000_BTC_TERNARY), \
CODE_FOR_ ## ICODE) /* ICODE */
+#define BU_P10V_AV_X(ENUM, NAME, ATTR) \
+ RS6000_BUILTIN_X (P10_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_altivec_" NAME, /* NAME */ \
+ RS6000_BTM_P10, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_SPECIAL), \
+ CODE_FOR_nothing) /* ICODE */
+
+
/* Insure 0 is not a legitimate index. */
BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC)
@@ -1474,6 +1491,18 @@ BU_ALTIVEC_X (LVSR, "lvsr", PURE)
BU_ALTIVEC_X (LVEBX, "lvebx", PURE)
BU_ALTIVEC_X (LVEHX, "lvehx", PURE)
BU_ALTIVEC_X (LVEWX, "lvewx", PURE)
+BU_P10V_AV_X (SE_LXVRBX, "se_lxvrbx", PURE)
+BU_P10V_AV_X (SE_LXVRHX, "se_lxvrhx", PURE)
+BU_P10V_AV_X (SE_LXVRWX, "se_lxvrwx", PURE)
+BU_P10V_AV_X (SE_LXVRDX, "se_lxvrdx", PURE)
+BU_P10V_AV_X (ZE_LXVRBX, "ze_lxvrbx", PURE)
+BU_P10V_AV_X (ZE_LXVRHX, "ze_lxvrhx", PURE)
+BU_P10V_AV_X (ZE_LXVRWX, "ze_lxvrwx", PURE)
+BU_P10V_AV_X (ZE_LXVRDX, "ze_lxvrdx", PURE)
+BU_P10V_AV_X (TR_STXVRBX, "tr_stxvrbx", MEM)
+BU_P10V_AV_X (TR_STXVRHX, "tr_stxvrhx", MEM)
+BU_P10V_AV_X (TR_STXVRWX, "tr_stxvrwx", MEM)
+BU_P10V_AV_X (TR_STXVRDX, "tr_stxvrdx", MEM)
BU_ALTIVEC_X (LVXL, "lvxl", PURE)
BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df", PURE)
BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di", PURE)
@@ -1740,6 +1769,9 @@ BU_ALTIVEC_OVERLOAD_X (LDL, "ldl")
BU_ALTIVEC_OVERLOAD_X (LVEBX, "lvebx")
BU_ALTIVEC_OVERLOAD_X (LVEHX, "lvehx")
BU_ALTIVEC_OVERLOAD_X (LVEWX, "lvewx")
+BU_P10V_OVERLOAD_X (SE_LXVRX, "se_lxvrx")
+BU_P10V_OVERLOAD_X (ZE_LXVRX, "ze_lxvrx")
+BU_P10V_OVERLOAD_X (TR_STXVRX, "tr_stxvrx")
BU_ALTIVEC_OVERLOAD_X (LVLX, "lvlx")
BU_ALTIVEC_OVERLOAD_X (LVLXL, "lvlxl")
BU_ALTIVEC_OVERLOAD_X (LVRX, "lvrx")
@@ -2727,11 +2759,11 @@ BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range")
BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
/* Builtins for scalar instructions added in ISA 3.1 (power10). */
-BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
-BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
-BU_P10_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
-BU_P10_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
-BU_P10_MISC_2 (PEXTD, "pextd", CONST, pextd)
+BU_P10_POWERPC64_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
+BU_P10_POWERPC64_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
+BU_P10_POWERPC64_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
+BU_P10_POWERPC64_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
+BU_P10_POWERPC64_MISC_2 (PEXTD, "pextd", CONST, pextd)
/* Builtins for vector instructions added in ISA 3.1 (power10). */
BU_P10V_AV_2 (VCLRLB, "vclrlb", CONST, vclrlb)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index f5982907..cc1e997 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -597,6 +597,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
/* Tell the user if we support the MMA instructions. */
if ((flags & OPTION_MASK_MMA) != 0)
rs6000_define_or_undefine_macro (define_p, "__MMA__");
+ /* Whether pc-relative code is being generated. */
+ if ((flags & OPTION_MASK_PCREL) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__PCREL__");
}
void
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index a8b52083..b044778 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -57,16 +57,14 @@
#include "gimplify.h"
#include "gimple-fold.h"
#include "gimple-iterator.h"
-#include "gimple-ssa.h"
+#include "ssa.h"
+#include "tree-ssa-propagate.h"
#include "builtins.h"
#include "tree-vector-builder.h"
#if TARGET_XCOFF
#include "xcoffout.h" /* get declarations of xcoff_*_section_name */
#endif
#include "ppc-auxv.h"
-#include "tree-ssa-propagate.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
#include "targhooks.h"
#include "opts.h"
@@ -1154,6 +1152,65 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+ /* vector signed__int128 vec_xl_sext (signed long long, signed char *);
+ vector signed__int128 vec_xl_sext (signed long long, signed short *);
+ vector signed__int128 vec_xl_sext (signed long long, signed int *);
+ vector signed__int128 vec_xl_sext (signed long long, signed longlong *); */
+ { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRBX,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+ { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRHX,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+ { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRWX,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+ { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRDX,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+ { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRDX,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+
+ /* vector unsigned__int128 vec_xl_zext (signed long long, unsigned char *);
+ vector unsigned__int128 vec_xl_zext (signed long long, unsigned short *);
+ vector unsigned__int128 vec_xl_zext (signed long long, unsigned int *);
+ vector unsigned__int128 vec_xl_zext (signed long long, unsigned longlong *); */
+ { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRBX,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+ { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRHX,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+ { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRWX,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+ { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRDX,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+ { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRDX,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
+
+ /* void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+ void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
+ void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+ void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
+ void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+ void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
+ void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+ void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *); */
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRBX, RS6000_BTI_void,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRBX, RS6000_BTI_void,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRHX, RS6000_BTI_void,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRHX, RS6000_BTI_void,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRWX, RS6000_BTI_void,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRWX, RS6000_BTI_void,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+ RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI },
+ { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI },
+
/* vector float vec_ldl (int, vector float *);
vector float vec_ldl (int, float *); */
{ ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
@@ -9576,6 +9633,85 @@ swap_endian_selector_for_mode (machine_mode mode)
gen_rtvec_v (16, perm)));
}
+/* For the load and sign extend rightmost elements; load and zero extend
+ rightmost element builtins. */
+static rtx
+altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool blk, bool sign_extend)
+{
+ rtx pat, addr;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode smode = insn_data[icode].operand[1].mode;
+ machine_mode mode0 = Pmode;
+ machine_mode mode1 = Pmode;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+
+ if (icode == CODE_FOR_nothing)
+ /* Builtin not supported on this processor. */
+ return 0;
+
+ /* If we got invalid arguments bail out before generating bad rtl. */
+ if (arg0 == error_mark_node || arg1 == error_mark_node)
+ return const0_rtx;
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ if (op0 == const0_rtx)
+ addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+ else
+ {
+ op0 = copy_to_mode_reg (mode0, op0);
+ addr = gen_rtx_MEM (blk ? BLKmode : smode,
+ gen_rtx_PLUS (Pmode, op1, op0));
+ }
+
+ if (sign_extend)
+ {
+ rtx discratch = gen_reg_rtx (DImode);
+ rtx tiscratch = gen_reg_rtx (TImode);
+
+ /* Emit the lxvr*x insn. */
+ pat = GEN_FCN (icode) (tiscratch, addr);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ /* Emit a sign extension from QI,HI,WI to double (DI). */
+ rtx scratch = gen_lowpart (smode, tiscratch);
+ if (icode == CODE_FOR_vsx_lxvrbx)
+ emit_insn (gen_extendqidi2 (discratch, scratch));
+ else if (icode == CODE_FOR_vsx_lxvrhx)
+ emit_insn (gen_extendhidi2 (discratch, scratch));
+ else if (icode == CODE_FOR_vsx_lxvrwx)
+ emit_insn (gen_extendsidi2 (discratch, scratch));
+ /* Assign discratch directly if scratch is already DI. */
+ if (icode == CODE_FOR_vsx_lxvrdx)
+ discratch = scratch;
+
+ /* Emit the sign extension from DI (double) to TI (quad). */
+ emit_insn (gen_extendditi2 (target, discratch));
+
+ return target;
+ }
+ else
+ {
+ /* Zero extend. */
+ pat = GEN_FCN (icode) (target, addr);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+ }
+ return 0;
+}
+
static rtx
altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
{
@@ -9694,7 +9830,7 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
- rtx pat, addr, rawaddr;
+ rtx pat, addr, rawaddr, truncrtx;
machine_mode tmode = insn_data[icode].operand[0].mode;
machine_mode smode = insn_data[icode].operand[1].mode;
machine_mode mode1 = Pmode;
@@ -9733,6 +9869,25 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
emit_insn (gen_rtx_SET (addr, op0));
}
+ else if (icode == CODE_FOR_vsx_stxvrbx
+ || icode == CODE_FOR_vsx_stxvrhx
+ || icode == CODE_FOR_vsx_stxvrwx
+ || icode == CODE_FOR_vsx_stxvrdx)
+ {
+ truncrtx = gen_rtx_TRUNCATE (tmode, op0);
+ op0 = copy_to_mode_reg (E_TImode, truncrtx);
+
+ if (op1 == const0_rtx)
+ addr = gen_rtx_MEM (Pmode, op2);
+ else
+ {
+ op1 = copy_to_mode_reg (mode1, op1);
+ addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
+ }
+ pat = GEN_FCN (icode) (addr, op0);
+ if (pat)
+ emit_insn (pat);
+ }
else
{
if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
@@ -10752,6 +10907,16 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
case ALTIVEC_BUILTIN_STVEWX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
+
+ case P10_BUILTIN_TR_STXVRBX:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrbx, exp);
+ case P10_BUILTIN_TR_STXVRHX:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrhx, exp);
+ case P10_BUILTIN_TR_STXVRWX:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrwx, exp);
+ case P10_BUILTIN_TR_STXVRDX:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrdx, exp);
+
case ALTIVEC_BUILTIN_STVXL_V2DF:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
case ALTIVEC_BUILTIN_STVXL_V2DI:
@@ -11014,6 +11179,30 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_LVEWX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
exp, target, false);
+ case P10_BUILTIN_SE_LXVRBX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrbx,
+ exp, target, false, true);
+ case P10_BUILTIN_SE_LXVRHX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrhx,
+ exp, target, false, true);
+ case P10_BUILTIN_SE_LXVRWX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrwx,
+ exp, target, false, true);
+ case P10_BUILTIN_SE_LXVRDX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrdx,
+ exp, target, false, true);
+ case P10_BUILTIN_ZE_LXVRBX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrbx,
+ exp, target, false, false);
+ case P10_BUILTIN_ZE_LXVRHX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrhx,
+ exp, target, false, false);
+ case P10_BUILTIN_ZE_LXVRWX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrwx,
+ exp, target, false, false);
+ case P10_BUILTIN_ZE_LXVRDX:
+ return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrdx,
+ exp, target, false, false);
case ALTIVEC_BUILTIN_LVXL_V2DF:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
exp, target, false);
@@ -12916,15 +13105,13 @@ rs6000_init_builtins (void)
/* Vector pair and vector quad support. */
if (TARGET_EXTRA_BUILTINS)
{
- tree oi_uns_type = make_unsigned_type (256);
- vector_pair_type_node = build_distinct_type_copy (oi_uns_type);
+ vector_pair_type_node = make_unsigned_type (256);
SET_TYPE_MODE (vector_pair_type_node, POImode);
layout_type (vector_pair_type_node);
lang_hooks.types.register_builtin_type (vector_pair_type_node,
"__vector_pair");
- tree xi_uns_type = make_unsigned_type (512);
- vector_quad_type_node = build_distinct_type_copy (xi_uns_type);
+ vector_quad_type_node = make_unsigned_type (512);
SET_TYPE_MODE (vector_quad_type_node, PXImode);
layout_type (vector_quad_type_node);
lang_hooks.types.register_builtin_type (vector_quad_type_node,
@@ -13298,6 +13485,18 @@ altivec_init_builtins (void)
def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
+ def_builtin ("__builtin_altivec_se_lxvrbx", v16qi_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRBX);
+ def_builtin ("__builtin_altivec_se_lxvrhx", v8hi_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRHX);
+ def_builtin ("__builtin_altivec_se_lxvrwx", v4si_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRWX);
+ def_builtin ("__builtin_altivec_se_lxvrdx", v2di_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRDX);
+ def_builtin ("__builtin_altivec_ze_lxvrbx", v16qi_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRBX);
+ def_builtin ("__builtin_altivec_ze_lxvrhx", v8hi_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRHX);
+ def_builtin ("__builtin_altivec_ze_lxvrwx", v4si_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRWX);
+ def_builtin ("__builtin_altivec_ze_lxvrdx", v2di_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRDX);
+ def_builtin ("__builtin_altivec_tr_stxvrbx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRBX);
+ def_builtin ("__builtin_altivec_tr_stxvrhx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRHX);
+ def_builtin ("__builtin_altivec_tr_stxvrwx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRWX);
+ def_builtin ("__builtin_altivec_tr_stxvrdx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRDX);
def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
ALTIVEC_BUILTIN_LVXL_V2DF);
@@ -13363,6 +13562,9 @@ altivec_init_builtins (void)
def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
+ def_builtin ("__builtin_vec_se_lxvrx", v1ti_ftype_long_pcvoid, P10_BUILTIN_VEC_SE_LXVRX);
+ def_builtin ("__builtin_vec_ze_lxvrx", v1ti_ftype_long_pcvoid, P10_BUILTIN_VEC_ZE_LXVRX);
+ def_builtin ("__builtin_vec_tr_stxvrx", void_ftype_opaque_long_pvoid, P10_BUILTIN_VEC_TR_STXVRX);
def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h
index 9caef01..32681b6 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -32,7 +32,7 @@ typedef struct rs6000_stack {
int cr_save_p; /* true if the CR reg needs to be saved */
unsigned int vrsave_mask; /* mask of vec registers to save */
int push_p; /* true if we need to allocate stack space */
- int calls_p; /* true if the function makes any calls */
+ int calls_p; /* true if there are non-sibling calls */
int world_save_p; /* true if we're saving *everything*:
r13-r31, cr, f14-f31, vrsave, v20-v31 */
enum rs6000_abi abi; /* which ABI to use */
diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index 0f88ec1..d90cd57 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -714,7 +714,7 @@ rs6000_stack_info (void)
info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
- info->first_altivec_reg_save);
- /* Does this function call anything? */
+ /* Does this function call anything (apart from sibling calls)? */
info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
/* Determine if we need to save the condition code registers. */
@@ -5479,7 +5479,18 @@ rs6000_expand_split_stack_prologue (void)
gcc_assert (flag_split_stack && reload_completed);
if (!info->push_p)
- return;
+ {
+ /* We need the -fsplit-stack prologue for functions that make
+ tail calls. Tail calls don't count against crtl->is_leaf.
+ Note that we are called inside a sequence. get_insns will
+ just return that (as yet empty) sequence, so instead we
+ access the function rtl with get_topmost_sequence. */
+ for (insn = get_topmost_sequence ()->first; insn; insn = NEXT_INSN (insn))
+ if (CALL_P (insn))
+ break;
+ if (!insn)
+ return;
+ }
if (global_regs[29])
{
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6f204ca..4d528a3 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -59,11 +59,12 @@
#include "gimplify.h"
#include "gimple-fold.h"
#include "gimple-iterator.h"
-#include "gimple-ssa.h"
#include "gimple-walk.h"
+#include "ssa.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
#include "intl.h"
#include "tm-constrs.h"
-#include "tree-vectorizer.h"
#include "target-globals.h"
#include "builtins.h"
#include "tree-vector-builder.h"
@@ -75,9 +76,6 @@
#endif
#include "case-cfn-macros.h"
#include "ppc-auxv.h"
-#include "tree-ssa-propagate.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
#include "rs6000-internal.h"
#include "opts.h"
@@ -3451,6 +3449,96 @@ rs6000_override_options_after_change (void)
flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
}
+#ifdef TARGET_USES_LINUX64_OPT
+static void
+rs6000_linux64_override_options ()
+{
+ if (!global_options_set.x_rs6000_alignment_flags)
+ rs6000_alignment_flags = MASK_ALIGN_NATURAL;
+ if (rs6000_isa_flags & OPTION_MASK_64BIT)
+ {
+ if (DEFAULT_ABI != ABI_AIX)
+ {
+ rs6000_current_abi = ABI_AIX;
+ error (INVALID_64BIT, "call");
+ }
+ dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
+ if (ELFv2_ABI_CHECK)
+ {
+ rs6000_current_abi = ABI_ELFv2;
+ if (dot_symbols)
+ error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
+ }
+ if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
+ error (INVALID_64BIT, "relocatable");
+ }
+ if (rs6000_isa_flags & OPTION_MASK_EABI)
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_EABI;
+ error (INVALID_64BIT, "eabi");
+ }
+ if (TARGET_PROTOTYPE)
+ {
+ target_prototype = 0;
+ error (INVALID_64BIT, "prototype");
+ }
+ if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
+ {
+ rs6000_isa_flags |= OPTION_MASK_POWERPC64;
+ error ("%<-m64%> requires a PowerPC64 cpu");
+ }
+ if (!global_options_set.x_rs6000_current_cmodel)
+ SET_CMODEL (CMODEL_MEDIUM);
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
+ {
+ if (global_options_set.x_rs6000_current_cmodel
+ && rs6000_current_cmodel != CMODEL_SMALL)
+ error ("%<-mcmodel incompatible with other toc options%>");
+ if (TARGET_MINIMAL_TOC)
+ SET_CMODEL (CMODEL_SMALL);
+ else if (TARGET_PCREL
+ || (PCREL_SUPPORTED_BY_OS
+ && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
+ /* Ignore -mno-minimal-toc. */
+ ;
+ else
+ SET_CMODEL (CMODEL_SMALL);
+ }
+ if (rs6000_current_cmodel != CMODEL_SMALL)
+ {
+ if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
+ TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
+ if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
+ TARGET_NO_SUM_IN_TOC = 0;
+ }
+ if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
+ {
+ if (global_options_set.x_rs6000_pltseq)
+ warning (0, "%qs unsupported for this ABI",
+ "-mpltseq");
+ rs6000_pltseq = false;
+ }
+ }
+ else if (TARGET_64BIT)
+ error (INVALID_32BIT, "32");
+ else
+ {
+ if (TARGET_PROFILE_KERNEL)
+ {
+ profile_kernel = 0;
+ error (INVALID_32BIT, "profile-kernel");
+ }
+ if (global_options_set.x_rs6000_current_cmodel)
+ {
+ SET_CMODEL (CMODEL_SMALL);
+ error (INVALID_32BIT, "cmodel");
+ }
+ }
+}
+#endif
+
/* Override command line options.
Combine build-specific configuration information with options
@@ -4236,7 +4324,9 @@ rs6000_option_override_internal (bool global_init_p)
}
/* Enable Altivec ABI for AIX -maltivec. */
- if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
+ if (TARGET_XCOFF
+ && (TARGET_ALTIVEC || TARGET_VSX)
+ && !global_options_set.x_rs6000_altivec_abi)
{
if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
error ("target attribute or pragma changes AltiVec ABI");
@@ -5731,7 +5821,7 @@ direct_return (void)
/* Helper for num_insns_constant. Calculate number of instructions to
load VALUE to a single gpr using combinations of addi, addis, ori,
- oris and sldi instructions. */
+ oris, sldi and rldimi instructions. */
static int
num_insns_constant_gpr (HOST_WIDE_INT value)
@@ -5759,7 +5849,7 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
high >>= 1;
- if (low == 0)
+ if (low == 0 || low == high)
return num_insns_constant_gpr (high) + 1;
else if (high == 0)
return num_insns_constant_gpr (low) + 1;
@@ -8364,7 +8454,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
low_int = 0;
high_int = INTVAL (XEXP (x, 1)) - low_int;
sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
- GEN_INT (high_int)), 0);
+ gen_int_mode (high_int, Pmode)), 0);
return plus_constant (Pmode, sum, low_int);
}
else if (GET_CODE (x) == PLUS
@@ -9020,15 +9110,21 @@ rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
bool reg_offset_p = reg_offset_addressing_ok_p (mode);
bool quad_offset_p = mode_supports_dq_form (mode);
- /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
+ if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
+ return 0;
+
+ /* Handle unaligned altivec lvx/stvx type addresses. */
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
&& GET_CODE (x) == AND
&& CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) == -16)
- x = XEXP (x, 0);
+ {
+ x = XEXP (x, 0);
+ return (legitimate_indirect_address_p (x, reg_ok_strict)
+ || legitimate_indexed_address_p (x, reg_ok_strict)
+ || virtual_stack_registers_memory_p (x));
+ }
- if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
- return 0;
if (legitimate_indirect_address_p (x, reg_ok_strict))
return 1;
if (TARGET_UPDATE
@@ -21176,9 +21272,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
return true;
}
else if ((outer_code == PLUS
- && reg_or_add_cint_operand (x, VOIDmode))
+ && reg_or_add_cint_operand (x, mode))
|| (outer_code == MINUS
- && reg_or_sub_cint_operand (x, VOIDmode))
+ && reg_or_sub_cint_operand (x, mode))
|| ((outer_code == SET
|| outer_code == IOR
|| outer_code == XOR)
@@ -26957,11 +27053,10 @@ rs6000_const_f32_to_i32 (rtx operand)
void
rs6000_emit_xxspltidp_v2df (rtx dst, long value)
{
- printf("rs6000_emit_xxspltidp_v2df called %ld\n", value);
- printf("rs6000_emit_xxspltidp_v2df called 0x%lx\n", value);
if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
inform (input_location,
- "the result for the xxspltidp instruction is undefined for subnormal input values.\n");
+ "the result for the xxspltidp instruction "
+ "is undefined for subnormal input values");
emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 694ff70..dc06014 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -11554,7 +11554,7 @@
""
{
/* Everything is best done with setbc[r] if available. */
- if (TARGET_POWER10)
+ if (TARGET_POWER10 && TARGET_ISEL)
rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
/* Expanding EQ and NE directly to some machine instructions does not help
@@ -12697,12 +12697,7 @@
""
{
if (rs6000_speculate_indirect_jumps)
- {
- if (TARGET_32BIT)
- emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
- else
- emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
- }
+ emit_jump_insn (gen_tablejump_normal (Pmode, operands[0], operands[1]));
else
{
rtx ccreg = gen_reg_rtx (CCmode);
@@ -12716,69 +12711,57 @@
DONE;
})
-(define_expand "tablejumpsi"
- [(set (match_dup 3)
- (plus:SI (match_operand:SI 0)
- (match_dup 2)))
- (parallel [(set (pc)
- (match_dup 3))
- (use (label_ref (match_operand 1)))])]
- "TARGET_32BIT && rs6000_speculate_indirect_jumps"
+(define_expand "@tablejump<mode>_normal"
+ [(use (match_operand:SI 0))
+ (use (match_operand:P 1))]
+ "rs6000_speculate_indirect_jumps"
{
+ rtx off;
operands[0] = force_reg (SImode, operands[0]);
- operands[2] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1]));
- operands[3] = gen_reg_rtx (SImode);
+ if (<MODE>mode == SImode)
+ off = operands[0];
+ else
+ {
+ off = gen_reg_rtx (Pmode);
+ rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]);
+ emit_move_insn (off, src);
+ }
+
+ rtx lab = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1]));
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_insn (gen_add<mode>3 (addr, off, lab));
+ emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
+ DONE;
})
-(define_expand "tablejumpsi_nospec"
- [(set (match_dup 4)
- (plus:SI (match_operand:SI 0)
- (match_dup 3)))
- (parallel [(set (pc)
- (match_dup 4))
- (use (label_ref (match_operand 1)))
- (clobber (match_operand 2))])]
- "TARGET_32BIT && !rs6000_speculate_indirect_jumps"
+(define_expand "@tablejump<mode>_nospec"
+ [(use (match_operand:SI 0))
+ (use (match_operand:P 1))
+ (use (match_operand:CC 2))]
+ "!rs6000_speculate_indirect_jumps"
{
+ rtx off;
operands[0] = force_reg (SImode, operands[0]);
- operands[3] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1]));
- operands[4] = gen_reg_rtx (SImode);
-})
+ if (<MODE>mode == SImode)
+ off = operands[0];
+ else
+ {
+ off = gen_reg_rtx (Pmode);
+ rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]);
+ emit_move_insn (off, src);
+ }
-(define_expand "tablejumpdi"
- [(set (match_dup 4)
- (sign_extend:DI (match_operand:SI 0 "lwa_operand")))
- (set (match_dup 3)
- (plus:DI (match_dup 4)
- (match_dup 2)))
- (parallel [(set (pc)
- (match_dup 3))
- (use (label_ref (match_operand 1)))])]
- "TARGET_64BIT && rs6000_speculate_indirect_jumps"
-{
- operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1]));
- operands[3] = gen_reg_rtx (DImode);
- operands[4] = gen_reg_rtx (DImode);
-})
+ rtx lab = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1]));
+ rtx addr = gen_reg_rtx (Pmode);
-(define_expand "tablejumpdi_nospec"
- [(set (match_dup 5)
- (sign_extend:DI (match_operand:SI 0 "lwa_operand")))
- (set (match_dup 4)
- (plus:DI (match_dup 5)
- (match_dup 3)))
- (parallel [(set (pc)
- (match_dup 4))
- (use (label_ref (match_operand 1)))
- (clobber (match_operand 2))])]
- "TARGET_64BIT && !rs6000_speculate_indirect_jumps"
-{
- operands[3] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1]));
- operands[4] = gen_reg_rtx (DImode);
- operands[5] = gen_reg_rtx (DImode);
+ emit_insn (gen_add<mode>3 (addr, off, lab));
+ emit_jump_insn (gen_tablejump_insn_nospec (Pmode, addr, operands[1],
+ operands[2]));
+ DONE;
})
-(define_insn "*tablejump<mode>_internal1"
+(define_insn "@tablejump<mode>_insn_normal"
[(set (pc)
(match_operand:P 0 "register_operand" "c,*l"))
(use (label_ref (match_operand 1)))]
@@ -12786,7 +12769,7 @@
"b%T0"
[(set_attr "type" "jmpreg")])
-(define_insn "*tablejump<mode>_internal1_nospec"
+(define_insn "@tablejump<mode>_insn_nospec"
[(set (pc)
(match_operand:P 0 "register_operand" "c,*l"))
(use (label_ref (match_operand 1)))
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index d78ddba..4c0fc86 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -42,6 +42,36 @@
#include <altivec.h>
#include <tmmintrin.h>
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
+{
+ __v16qi result = (__v16qi)__A;
+
+ result [__N & 0xf] = __D;
+
+ return (__m128i) result;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
+{
+ __v4si result = (__v4si)__A;
+
+ result [__N & 3] = __D;
+
+ return (__m128i) result;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
+{
+ __v2di result = (__v2di)__A;
+
+ result [__N & 1] = __D;
+
+ return (__m128i) result;
+}
+
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi8 (__m128i __X, const int __N)
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4ff5245..d6347db 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -352,6 +352,8 @@
UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
UNSPEC_XXGENPCV
UNSPEC_MTVSBM
+ UNSPEC_EXTENDDITI2
+ UNSPEC_MTVSRD_DITI_W1
UNSPEC_VCNTMB
UNSPEC_VEXPAND
UNSPEC_VEXTRACT
@@ -1253,6 +1255,24 @@
}
})
+;; Load rightmost element from load_data
+;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
+(define_insn "vsx_lxvr<wd>x"
+ [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+ (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
+ "TARGET_POWER10"
+ "lxvr<wd>x %x0,%y1"
+ [(set_attr "type" "vecload")])
+
+;; Store rightmost element into store_data
+;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
+(define_insn "vsx_stxvr<wd>x"
+ [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
+ (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
+ "TARGET_POWER10"
+ "stxvr<wd>x %x1,%y0"
+ [(set_attr "type" "vecstore")])
+
;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
;; when you really want their element-reversing behavior.
(define_insn "vsx_ld_elemrev_v2di"
@@ -4795,6 +4815,37 @@
"vextsw2d %0,%1"
[(set_attr "type" "vecexts")])
+;; ISA 3.1 vector sign extend
+;; Move DI value from GPR to TI mode in VSX register, word 1.
+(define_insn "mtvsrdd_diti_w1"
+ [(set (match_operand:TI 0 "register_operand" "=wa")
+ (unspec:TI [(match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_MTVSRD_DITI_W1))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrdd %x0,0,%1"
+ [(set_attr "type" "vecmove")])
+
+;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
+(define_insn "extendditi2_vector"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
+ (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
+ UNSPEC_EXTENDDITI2))]
+ "TARGET_POWER10"
+ "vextsd2q %0,%1"
+ [(set_attr "type" "vecexts")])
+
+(define_expand "extendditi2"
+ [(set (match_operand:TI 0 "gpc_reg_operand")
+ (sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))]
+ "TARGET_POWER10"
+ {
+ /* Move 64-bit src from GPR to vector reg and sign extend to 128-bits. */
+ rtx temp = gen_reg_rtx (TImode);
+ emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1]));
+ emit_insn (gen_extendditi2_vector (operands[0], temp));
+ DONE;
+ })
+
;; ISA 3.0 Binary Floating-Point Support
@@ -5659,7 +5710,7 @@
{
int i;
int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
- int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
+ int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
rtx rvals[16];
rtx mask = gen_reg_rtx (V16QImode);
@@ -5693,7 +5744,7 @@
"TARGET_P9_VECTOR"
{
int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
- int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
+ int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
int i;
rtx rvals[16];
@@ -6035,7 +6086,7 @@
(match_operand:QI 2 "const_0_to_1_operand" "n")]
UNSPEC_VCNTMB))]
"TARGET_POWER10"
- "vcntmb<VSX_MM_SUFFIX> %0,%1,%2"
+ "vcntmb<wd> %0,%1,%2"
[(set_attr "type" "vecsimple")])
(define_insn "vec_extract_<mode>"
@@ -6043,7 +6094,7 @@
(unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
UNSPEC_VEXTRACT))]
"TARGET_POWER10"
- "vextract<VSX_MM_SUFFIX>m %0,%1"
+ "vextract<wd>m %0,%1"
[(set_attr "type" "vecsimple")])
(define_insn "vec_expand_<mode>"
@@ -6051,5 +6102,5 @@
(unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
UNSPEC_VEXPAND))]
"TARGET_POWER10"
- "vexpand<VSX_MM_SUFFIX>m %0,%1"
+ "vexpand<wd>m %0,%1"
[(set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index 771dddf..87ca3af 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -18,10 +18,21 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* Note to future editors: VxWorks is mostly an EABI target. We do
- not use rs6000/eabi.h because we would have to override most of
- it anyway. However, if you change that file, consider making
- analogous changes here too. */
+/* The port comes in two very different flavors at this stage:
+
+ - For 653 (AE) and regular versions prior to VxWorks 7, the port
+ comes with its own set of definitions, matching a system compiler
+ configured this way as well as the corresponding run-time
+ environment. This is essentially an eabi system, so changes to
+ eabi.h should usually be reflected here.
+
+ - Starting with VxWorks 7 (post SR600), the system environment
+ was made extremely similar to GNU/Linux and this toolchain is
+ built on top of the corresponding header files. */
+
+/*-------------------------------------------------------------*/
+/* Common definitions first. */
+/*-------------------------------------------------------------*/
/* CPP predefined macros. */
@@ -29,111 +40,156 @@ along with GCC; see the file COPYING3. If not see
#define TARGET_OS_CPP_BUILTINS() \
do \
{ \
- builtin_define ("__ppc"); \
- builtin_define ("__PPC__"); \
- builtin_define ("__EABI__"); \
builtin_define ("__ELF__"); \
+ if (!TARGET_VXWORKS7) \
+ builtin_define ("__EABI__"); \
+ \
+ /* CPU macros, based on what the system compilers do. */ \
+ if (!TARGET_VXWORKS7) \
+ { \
+ builtin_define ("__ppc"); \
+ /* Namespace violation below, but the system headers \
+ really depend heavily on this. */ \
+ builtin_define ("CPU_FAMILY=PPC"); \
+ \
+ /* __PPC__ isn't actually emitted by the system compiler \
+ prior to vx7 but has been advertised by us for ages. */ \
+ builtin_define ("__PPC__"); \
+ } \
+ else \
+ { \
+ builtin_define ("__PPC__"); \
+ builtin_define ("__powerpc__"); \
+ if (TARGET_64BIT) \
+ { \
+ builtin_define ("__PPC64__"); \
+ builtin_define ("__powerpc64__"); \
+ } \
+ else \
+ { \
+ builtin_define ("__PPC"); \
+ builtin_define ("__powerpc"); \
+ } \
+ } \
+ \
+ /* Asserts for #cpu and #machine. */ \
+ if (TARGET_64BIT) \
+ { \
+ builtin_assert ("cpu=powerpc64"); \
+ builtin_assert ("machine=powerpc64"); \
+ } \
+ else \
+ { \
+ builtin_assert ("cpu=powerpc"); \
+ builtin_assert ("machine=powerpc"); \
+ } \
+ \
+ /* PowerPC VxWorks specificities. */ \
if (!TARGET_SOFT_FLOAT) \
- builtin_define ("__hardfp"); \
+ { \
+ builtin_define ("__hardfp"); \
+ builtin_define ("_WRS_HARDWARE_FP"); \
+ } \
\
- /* C89 namespace violation! */ \
- builtin_define ("CPU_FAMILY=PPC"); \
- \
+ /* Common VxWorks and port items. */ \
VXWORKS_OS_CPP_BUILTINS (); \
+ TARGET_OS_SYSV_CPP_BUILTINS (); \
} \
while (0)
-/* vx6 library path. */
-#if !TARGET_VXWORKS7
-#undef STARTFILE_PREFIX_SPEC
-#define STARTFILE_PREFIX_SPEC \
- "%{mrtp:%{!shared:%:getenv(WIND_BASE /target/lib/usr/lib/ppc/PPC32/common)}}"
+/* Specific CPU macro definitions expected by the system headers,
+ inferred from -mcpu requests by the user. Different versions of
+ VxWorks expect different forms of macros, such as
+
+ -D_VX_CPU=_VX_PPC403 on Vx7 and some variants of Vx6,
+ -DCPU=PPC403 on all Vx6 and earlier. */
+
+#if TARGET_VXWORKS7
+#define VX_CPU_PREFIX "_VX_"
+#else
+#define VX_CPU_PREFIX ""
#endif
-/* Only big endian PPC is supported by VxWorks. */
-#undef BYTES_BIG_ENDIAN
-#define BYTES_BIG_ENDIAN 1
-#undef WORDS_BIG_ENDIAN
-#define WORDS_BIG_ENDIAN 1
+#define VX_CPUDEF(CPUID) \
+ ":-D" VX_CPU_PREFIX "CPU=" VX_CPU_PREFIX #CPUID
-/* We have to kill off the entire specs set created by rs6000/sysv4.h
- and substitute our own set. The top level vxworks.h has done some
- of this for us. */
+#define VX_MCPU(CPU,CPUID) \
+ "mcpu=" #CPU VX_CPUDEF(CPUID)
-#undef SUBTARGET_EXTRA_SPECS
#undef CPP_SPEC
-#undef CC1_SPEC
-#undef ASM_SPEC
-
-#define SUBTARGET_EXTRA_SPECS /* none needed */
+#define CPP_SPEC \
+ "%{!D" VX_CPU_PREFIX "CPU=*:%{" \
+ VX_MCPU(403, PPC403) ";" \
+ VX_MCPU(405, PPC405) ";" \
+ VX_MCPU(440, PPC440) ";" \
+ VX_MCPU(464, PPC464) ";" \
+ VX_MCPU(476, PPC476) ";" \
+ VX_MCPU(603, PPC603) ";" \
+ VX_MCPU(604, PPC604) ";" \
+ VX_MCPU(860, PPC860) ";" \
+ VX_MCPU(e6500, E6500) ";" \
+ VX_MCPU(8540, PPC85XX) ";" \
+ VX_MCPU(8548, PPC85XX) ";" \
+ VX_CPUDEF(PPC604) \
+ "}}" \
+ VXWORKS_ADDITIONAL_CPP_SPEC
/* FIXME: The only reason we allow no -mcpu switch at all is because
- config-ml.in insists on a "." multilib. */
-#define CPP_SPEC \
-"%{!DCPU=*: \
- %{mcpu=403 : -DCPU=PPC403 ; \
- mcpu=405 : -DCPU=PPC405 ; \
- mcpu=440 : -DCPU=PPC440 ; \
- mcpu=464 : -DCPU=PPC464 ; \
- mcpu=476 : -DCPU=PPC476 ; \
- mcpu=603 : -DCPU=PPC603 ; \
- mcpu=604 : -DCPU=PPC604 ; \
- mcpu=860 : -DCPU=PPC860 ; \
- mcpu=8540: -DCPU=PPC85XX ; \
- mcpu=8548: -DCPU=PPC85XX ; \
- : -DCPU=PPC604 }}" \
-VXWORKS_ADDITIONAL_CPP_SPEC
-
-#define CC1_SPEC \
-"%{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default} \
- %{mlittle|mlittle-endian:-mstrict-align}"
-
-#define ASM_SPEC \
-"%(asm_cpu) \
- %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
- %{mrelocatable} %{mrelocatable-lib} %{" FPIC_SPEC ":-K PIC} -mbig"
+ config-ml.in insists on a "." multilib. */
#undef LIB_SPEC
#define LIB_SPEC VXWORKS_LIB_SPEC
-/* For RTPs, leverage linker relaxation. This helps programs referring
- to, typically, kernel services too far away for short calls. This is more
- precise than -mlongcall and can be overriden with -Wl,--no-relax. */
-#define VXWORKS_RELAX_LINK_SPEC "%{mrtp:--relax}"
-
-#undef LINK_SPEC
-#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_RELAX_LINK_SPEC
-
#undef STARTFILE_SPEC
#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+
#undef ENDFILE_SPEC
#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
/* There is no default multilib. */
#undef MULTILIB_DEFAULTS
-#undef TARGET_DEFAULT
-#define TARGET_DEFAULT (MASK_EABI | MASK_STRICT_ALIGN)
+/* No _mcount profiling on VxWorks. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
-#undef PROCESSOR_DEFAULT
-#define PROCESSOR_DEFAULT PROCESSOR_PPC604
+/* Initialize library function table. */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS rs6000_vxworks_init_libfuncs
/* Nor sdata, for kernel mode. We use this in
SUBSUBTARGET_INITIALIZE_OPTIONS, after rs6000_rtp has been initialized. */
#undef SDATA_DEFAULT_SIZE
#define SDATA_DEFAULT_SIZE (TARGET_VXWORKS_RTP ? 8 : 0)
-/* Enforce 16-byte alignment for the stack pointer, to permit general
- compliance with e.g. Altivec instructions requirements. Make sure
- this isn't overruled by the EABI constraints. */
+#undef SUB3TARGET_OVERRIDE_OPTIONS
+#define SUB3TARGET_OVERRIDE_OPTIONS \
+ do { \
+ if (!global_options_set.x_g_switch_value) \
+ g_switch_value = SDATA_DEFAULT_SIZE; \
+ VXWORKS_OVERRIDE_OPTIONS; \
+ } while (0)
-#undef STACK_BOUNDARY
-#define STACK_BOUNDARY (16*BITS_PER_UNIT)
+/* The stack pointer need not be moved while checking the stack. */
+#undef STACK_CHECK_MOVING_SP
-#undef PREFERRED_STACK_BOUNDARY
-#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
+/* Define this to be nonzero if static stack checking is supported. */
+#define STACK_CHECK_STATIC_BUILTIN 1
-#undef ABI_STACK_BOUNDARY
+/* Room needed to allow exception propagation, from what experiments
+ and low level observations taught us ... */
+#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+
+/* Leverage linker relaxation for RTPs. This helps 32bit programs
+ referring to kernel services too far away for short calls, is more
+ precise than -mlongcall and can be overriden with -Wl,--no-relax. */
+#define VXWORKS_RELAX_LINK_SPEC "%{mrtp:--relax}"
+
+/*-------------------------------------------------------------*/
+/* Pre-VxWorks7 configuration. */
+/*-------------------------------------------------------------*/
+
+#if !TARGET_VXWORKS7
#undef RS6000_STARTING_FRAME_OFFSET
#define RS6000_STARTING_FRAME_OFFSET \
@@ -146,21 +202,79 @@ VXWORKS_ADDITIONAL_CPP_SPEC
RS6000_ALIGN (crtl->outgoing_args_size.to_constant () \
+ STACK_POINTER_OFFSET, 16)
-#undef SUBSUBTARGET_OVERRIDE_OPTIONS
-#define SUBSUBTARGET_OVERRIDE_OPTIONS \
- do { \
- if (!global_options_set.x_g_switch_value) \
- g_switch_value = SDATA_DEFAULT_SIZE; \
- VXWORKS_OVERRIDE_OPTIONS; \
- } while (0)
+/* Enforce 16-byte alignment for the stack pointer, to permit general
+ compliance with e.g. Altivec instructions requirements. Make sure
+ this isn't overruled by the EABI constraints. */
-/* No _mcount profiling on VxWorks. */
-#undef FUNCTION_PROFILER
-#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY (16*BITS_PER_UNIT)
-/* Define this to be nonzero if static stack checking is supported. */
-#define STACK_CHECK_STATIC_BUILTIN 1
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
+
+#undef ABI_STACK_BOUNDARY
+
+#undef STARTFILE_PREFIX_SPEC
+#define STARTFILE_PREFIX_SPEC \
+ "%{mrtp:%{!shared:%:getenv(WIND_BASE /target/lib/usr/lib/ppc/PPC32/common)}}"
+
+/* For aggregates passing, use the same, consistent ABI as Linux. */
+#define AGGREGATE_PADDING_FIXED 0
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+"%(asm_cpu) \
+ %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+ %{mrelocatable} %{mrelocatable-lib} %{" FPIC_SPEC ":-K PIC} -mbig"
+
+#undef CC1_SPEC
+#define CC1_SPEC VXWORKS_CC1_SPEC " \
+ %{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default} \
+ %{mlittle|mlittle-endian:-mstrict-align}"
+
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_RELAX_LINK_SPEC
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_EABI | MASK_STRICT_ALIGN)
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604
+
+/* Only big endian PPC is supported by VxWorks. */
+#undef BYTES_BIG_ENDIAN
+#define BYTES_BIG_ENDIAN 1
+
+#undef WORDS_BIG_ENDIAN
+#define WORDS_BIG_ENDIAN 1
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS /* none needed */
+
+#else /* TARGET_VXWORKS7 */
+
+/*-------------------------------------------------------------*/
+/* Post-VxWorks7 (SR600) configuration. */
+/*-------------------------------------------------------------*/
+
+/* VxWorks does not use local symbols for the function entry point. */
+#undef DOT_SYMBOLS
+#define DOT_SYMBOLS 0
+
+#undef LINK_OS_VXWORKS_SPEC
+#define LINK_OS_VXWORKS_SPEC \
+ " %{!mrtp:-r} %{mrtp:-q -static} %{!Xbind-lazy:-z now}"
+
+#undef LINK_OS_EXTRA_SPEC32
+#define LINK_OS_EXTRA_SPEC32 LINK_OS_VXWORKS_SPEC " " VXWORKS_RELAX_LINK_SPEC
+
+#undef LINK_OS_EXTRA_SPEC64
+#define LINK_OS_EXTRA_SPEC64 LINK_OS_VXWORKS_SPEC
+
+/* linux64.h enables this, not supported in vxWorks. */
+#undef TARGET_FLOAT128_ENABLE_TYPE
+#define TARGET_FLOAT128_ENABLE_TYPE 0
+
+#endif /* TARGET_VXWORKS7 */
-/* This platform supports the probing method of stack checking (RTP mode).
- 8K is reserved in the stack to propagate exceptions in case of overflow. */
-#define STACK_CHECK_PROTECT 8192
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 6f1bc07..029f728 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
extern void s390_expand_vec_init (rtx, rtx);
+extern rtx s390_build_signbit_mask (machine_mode);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index c762840..f9b27f9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2467,6 +2467,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
rtx elt;
bool b;
+ /* Handle floats by bitcasting them to ints. */
+ op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
+
gcc_assert (!!start == !!end);
if (!const_vec_duplicate_p (op, &elt)
|| !CONST_INT_P (elt))
@@ -5952,6 +5955,7 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
rtx temp;
rtx len = gen_reg_rtx (QImode);
rtx cond;
+ rtx mem;
s390_load_address (str_addr_base_reg, XEXP (string, 0));
emit_move_insn (str_idx_reg, const0_rtx);
@@ -5993,10 +5997,10 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
LABEL_NUSES (loop_start_label) = 1;
/* Load 16 bytes of the string into VR. */
- emit_move_insn (str_reg,
- gen_rtx_MEM (V16QImode,
- gen_rtx_PLUS (Pmode, str_idx_reg,
- str_addr_base_reg)));
+ mem = gen_rtx_MEM (V16QImode,
+ gen_rtx_PLUS (Pmode, str_idx_reg, str_addr_base_reg));
+ set_mem_align (mem, 128);
+ emit_move_insn (str_reg, mem);
if (into_loop_label != NULL_RTX)
{
emit_label (into_loop_label);
@@ -6863,15 +6867,16 @@ s390_expand_vec_init (rtx target, rtx vals)
}
/* Use vector gen mask or vector gen byte mask if possible. */
- if (all_same && all_const_int
- && (XVECEXP (vals, 0, 0) == const0_rtx
- || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
- NULL, NULL)
- || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
+ if (all_same && all_const_int)
{
- emit_insn (gen_rtx_SET (target,
- gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
- return;
+ rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+ if (XVECEXP (vals, 0, 0) == const0_rtx
+ || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
+ || s390_bytemask_vector_p (vec, NULL))
+ {
+ emit_insn (gen_rtx_SET (target, vec));
+ return;
+ }
}
/* Use vector replicate instructions. vlrep/vrepi/vrep */
@@ -6949,6 +6954,30 @@ s390_expand_vec_init (rtx target, rtx vals)
}
}
+/* Emit a vector constant that contains 1s in each element's sign bit position
+ and 0s in other positions. MODE is the desired constant's mode. */
+extern rtx
+s390_build_signbit_mask (machine_mode mode)
+{
+ /* Generate the integral element mask value. */
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
+ wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
+
+ /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
+ value to the desired mode. */
+ machine_mode int_mode = related_int_vector_mode (mode).require ();
+ rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
+ mask = gen_lowpart (inner_mode, mask);
+
+ /* Emit the vector mask rtx by mode the element mask rtx. */
+ int nunits = GET_MODE_NUNITS (mode);
+ rtvec v = rtvec_alloc (nunits);
+ for (int i = 0; i < nunits; i++)
+ RTVEC_ELT (v, i) = mask;
+ return gen_rtx_CONST_VECTOR (mode, v);
+}
+
/* Structure to hold the initial parameters for a compare_and_swap operation
in HImode and QImode. */
@@ -16082,12 +16111,13 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
fenv_var = __builtin_s390_efpc ();
__builtin_s390_sfpc (fenv_var & mask) */
- tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
- tree new_fpc =
- build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
- build_int_cst (unsigned_type_node,
- ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
- FPC_EXCEPTION_MASK)));
+ tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
+ NULL_TREE, NULL_TREE);
+ tree new_fpc
+ = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
+ build_int_cst (unsigned_type_node,
+ ~(FPC_DXC_MASK | FPC_FLAGS_MASK
+ | FPC_EXCEPTION_MASK)));
tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
*hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
@@ -16106,8 +16136,8 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
__atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
old_fpc = create_tmp_var_raw (unsigned_type_node);
- tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
- old_fpc, call_efpc);
+ tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
+ NULL_TREE, NULL_TREE);
set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4c3e540..18edea1 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1391,23 +1391,55 @@
; (TF|DF|SF|TD|DD|SD) instructions
-; FIXME: load and test instructions turn SNaN into QNaN what is not
-; acceptable if the target will be used afterwards. On the other hand
-; they are quite convenient for implementing comparisons with 0.0. So
-; try to enable them via splitter/peephole if the value isn't needed anymore.
-; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
+; load and test instructions turn a signaling NaN into a quiet NaN. Thus they
+; may only be used if the target register is dead afterwards or if fast math
+; is enabled. The former is done via a peephole optimization. Note, load and
+; test instructions may only be used for (in)equality comparisons because
+; relational comparisons must treat a quiet NaN like a signaling NaN which is
+; not the case for load and test instructions. For fast math insn
+; "cmp<mode>_ccs_0_fastmath" applies.
+; See testcases load-and-test-fp-{1,2}.c
+
+(define_peephole2
+ [(set (match_operand:FP 0 "register_operand")
+ (match_operand:FP 1 "const0_operand"))
+ (set (reg:CCZ CC_REGNUM)
+ (compare:CCZ (match_operand:FP 2 "register_operand")
+ (match_operand:FP 3 "register_operand")))]
+ "TARGET_HARD_FLOAT
+ && FP_REG_P (operands[2])
+ && REGNO (operands[0]) == REGNO (operands[3])
+ && peep2_reg_dead_p (2, operands[0])
+ && peep2_reg_dead_p (2, operands[2])"
+ [(parallel
+ [(set (reg:CCZ CC_REGNUM)
+ (compare:CCZ (match_dup 2) (match_dup 1)))
+ (clobber (match_dup 2))])]
+ "")
; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
-(define_insn "*cmp<mode>_ccs_0"
- [(set (reg CC_REGNUM)
- (compare (match_operand:FP 0 "register_operand" "f")
- (match_operand:FP 1 "const0_operand" "")))
- (clobber (match_operand:FP 2 "register_operand" "=0"))]
- "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+(define_insn "*cmp<mode>_ccz_0"
+ [(set (reg:CCZ CC_REGNUM)
+ (compare:CCZ (match_operand:FP 0 "register_operand" "f")
+ (match_operand:FP 1 "const0_operand")))
+ (clobber (match_operand:FP 2 "register_operand" "=0"))]
+ "TARGET_HARD_FLOAT"
"lt<xde><bt>r\t%0,%0"
[(set_attr "op_type" "RRE")
(set_attr "type" "fsimp<mode>")])
+(define_insn "*cmp<mode>_ccs_0_fastmath"
+ [(set (reg CC_REGNUM)
+ (compare (match_operand:FP 0 "register_operand" "f")
+ (match_operand:FP 1 "const0_operand")))]
+ "s390_match_ccmode (insn, CCSmode)
+ && TARGET_HARD_FLOAT
+ && !flag_trapping_math
+ && !flag_signaling_nans"
+ "lt<xde><bt>r\t%0,%0"
+ [(set_attr "op_type" "RRE")
+ (set_attr "type" "fsimp<mode>")])
+
; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
(define_insn "*cmp<mode>_ccs"
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 2573b7d..3c01cd1 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1425,35 +1425,45 @@
; Vector copysign, implement using vector select
(define_expand "copysign<mode>3"
- [(set (match_operand:VFT 0 "register_operand" "")
- (if_then_else:VFT
- (eq (match_dup 3)
- (match_dup 4))
- (match_operand:VFT 1 "register_operand" "")
- (match_operand:VFT 2 "register_operand" "")))]
+ [(set (match_operand:VFT 0 "register_operand" "")
+ (ior:VFT
+ (and:VFT (match_operand:VFT 2 "register_operand" "")
+ (match_dup 3))
+ (and:VFT (not:VFT (match_dup 3))
+ (match_operand:VFT 1 "register_operand" ""))))]
"TARGET_VX"
{
- int sz = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
- int prec = GET_MODE_PRECISION (GET_MODE_INNER (<tointvec>mode));
- wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
-
- rtx mask = gen_reg_rtx (<tointvec>mode);
-
- int nunits = GET_MODE_NUNITS (<tointvec>mode);
- rtvec v = rtvec_alloc (nunits);
- for (int i = 0; i < nunits; i++)
- RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
-
- mask = gen_rtx_CONST_VECTOR (<tointvec>mode, v);
- operands[3] = force_reg (<tointvec>mode, mask);
- operands[4] = CONST0_RTX (<tointvec>mode);
+ rtx mask = s390_build_signbit_mask (<MODE>mode);
+ operands[3] = force_reg (<MODE>mode, mask);
})
;;
;; Integer compares
;;
-(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode>_nocc"
+(define_expand "vec_cmp<VI_HW:mode><VI_HW:mode>"
+ [(set (match_operand:VI_HW 0 "register_operand" "")
+ (match_operator:VI_HW 1 ""
+ [(match_operand:VI_HW 2 "register_operand" "")
+ (match_operand:VI_HW 3 "register_operand" "")]))]
+ "TARGET_VX"
+{
+ s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
+ DONE;
+})
+
+(define_expand "vec_cmpu<VI_HW:mode><VI_HW:mode>"
+ [(set (match_operand:VI_HW 0 "register_operand" "")
+ (match_operator:VI_HW 1 ""
+ [(match_operand:VI_HW 2 "register_operand" "")
+ (match_operand:VI_HW 3 "register_operand" "")]))]
+ "TARGET_VX"
+{
+ s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
+ DONE;
+})
+
+(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode><VI:mode>_nocc"
[(set (match_operand:VI 2 "register_operand" "=v")
(VICMP_HW_OP:VI (match_operand:VI 0 "register_operand" "v")
(match_operand:VI 1 "register_operand" "v")))]
diff --git a/gcc/config/t-vxworks b/gcc/config/t-vxworks
index fd1fbfd..221f53c 100644
--- a/gcc/config/t-vxworks
+++ b/gcc/config/t-vxworks
@@ -59,7 +59,7 @@ stmp-int-hdrs: subst-glimits.h
subst-%.h:
cp -p $(srcdir)/$*.h orig-$*.h
ID=$$(echo $(BASEVER_c) | sed -e 's/\./_/g'); \
- sed -e "s/_LIMITS_H__/_LIMITS_H_$${ID}_/" < $(srcdir)/$*.h > $@
+ sed -e "s/_LIMITS_H__/_LIMITS_H__$${ID}_/" < $(srcdir)/$*.h > $@
cp $@ $(srcdir)/$*.h
# Then arrange to restore the original versions after the standard
diff --git a/gcc/config/vx-common.h b/gcc/config/vx-common.h
index f4a1ffd..9cd7b3d 100644
--- a/gcc/config/vx-common.h
+++ b/gcc/config/vx-common.h
@@ -23,8 +23,6 @@ along with GCC; see the file COPYING3. If not see
/* Most of these will probably be overridden by subsequent headers. We
undefine them here just in case, and define VXWORKS_ versions of each,
to be used in port-specific vxworks.h. */
-#undef LIB_SPEC
-#undef LINK_SPEC
#undef LIBGCC_SPEC
#define LIBGCC_SPEC VXWORKS_LIBGCC_SPEC
#undef STARTFILE_SPEC
diff --git a/gcc/config/vxworks.c b/gcc/config/vxworks.c
index 970d504..ca0f5de 100644
--- a/gcc/config/vxworks.c
+++ b/gcc/config/vxworks.c
@@ -154,8 +154,10 @@ vxworks_override_options (void)
targetm.have_ctors_dtors =
TARGET_VXWORKS_HAVE_CTORS_DTORS || HAVE_INITFINI_ARRAY_SUPPORT;
- /* PIC is only supported for RTPs. */
- if (flag_pic && !TARGET_VXWORKS_RTP)
+ /* PIC is only supported for RTPs. flags_pic might be < 0 here, in
+ contexts where the corresponding switches are not processed,
+ e.g. from --help. We are not generating code in such cases. */
+ if (flag_pic > 0 && !TARGET_VXWORKS_RTP)
error ("PIC is only supported for RTPs");
/* VxWorks comes with non-gdb debuggers which only support strict
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index e50260b0..b7e5970 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -70,6 +70,12 @@ along with GCC; see the file COPYING3. If not see
#endif
+/* Our ports rely on gnu-user.h, which #defines _POSIX_SOURCE for
+ C++ by default. VxWorks doesn't provide 100% of what this implies
+ (e.g. ::mkstemp), so, arrange to prevent that by falling back to
+ the default CPP spec for C++ as well. */
+#undef CPLUSPLUS_CPP_SPEC
+
/* For VxWorks static rtps, the system provides libc_internal.a, a superset of
libgcc.a that we need to use e.g. to satisfy references to __init and
__fini. We still want our libgcc to prevail for symbols it would provide
@@ -84,7 +90,7 @@ along with GCC; see the file COPYING3. If not see
#define VXWORKS_SYSCALL_LIBS_RTP
#if TARGET_VXWORKS7
-#define VXWORKS_NET_LIBS_RTP "-lnet"
+#define VXWORKS_NET_LIBS_RTP "-l%:if-exists-then-else(%:getenv(VSB_DIR /usr/h/public/rtnetStackLib.h) rtnet net)"
#else
#define VXWORKS_NET_LIBS_RTP "-lnet -ldsi"
#endif
@@ -152,8 +158,7 @@ along with GCC; see the file COPYING3. If not see
/* Setup the crtstuff begin/end we might need for dwarf EH registration. */
#if !defined(CONFIG_SJLJ_EXCEPTIONS) && DWARF2_UNWIND_INFO
-#define VX_CRTBEGIN_SPEC \
- "%{!mrtp:vx_crtbegin-kernel.o%s} %{mrtp:vx_crtbegin-rtp.o%s}"
+#define VX_CRTBEGIN_SPEC "vx_crtbegin.o%s"
#define VX_CRTEND_SPEC "-l:vx_crtend.o"
#else
#define VX_CRTBEGIN_SPEC ""
diff --git a/gcc/config/vxworks/_vxworks-versions.h b/gcc/config/vxworks/_vxworks-versions.h
index 0aaf547..15e8bfe 100644
--- a/gcc/config/vxworks/_vxworks-versions.h
+++ b/gcc/config/vxworks/_vxworks-versions.h
@@ -22,17 +22,29 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#ifndef _VXWORKS_VERSIONS_H
#define _VXWORKS_VERSIONS_H 1
-/* All we need is access to the bare _WRS_VXWORKS_MAJOR/MINOR macros
- exposed by version.h. Cheat a bit to make sure we don't drag additional
- header files, which can easily cause #include ordering nightmares. */
+/* All we need is access to the bare _WRS_VXWORKS_MAJOR/MINOR macros,
+ exposed by version.h or already provided somehow (e.g. with a self
+ spec for some reason). When resorting to system headers, cheat a
+ bit to make sure we don't drag additional header files, which can
+ easily cause #include ordering nightmares. */
+#if !defined(_WRS_VXWORKS_MAJOR)
#pragma push_macro("_WRS_KERNEL")
#undef _WRS_KERNEL
#include <version.h>
#pragma pop_macro("_WRS_KERNEL")
+#endif
+
+/* A lot depends on the MAJOR so we really need to make sure we have
+ that. MINOR is less critical and many environments don't actually
+ define it unless it is really meaningful (e.g. 6.4 through 6.9). */
#if !defined(_WRS_VXWORKS_MAJOR)
-#error "VxWorks version macros needed but not defined"
+#error "_WRS_VXWORKS_MAJOR undefined"
+#endif
+
+#if !defined(_WRS_VXWORKS_MINOR)
+#define _WRS_VXWORKS_MINOR 0
#endif
#define _VXWORKS_MAJOR_GT(MAJOR) (_WRS_VXWORKS_MAJOR > (MAJOR))