diff options
author | Srinath Parvathaneni <srinath.parvathaneni@arm.com> | 2020-03-18 17:16:21 +0000 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2020-03-18 17:16:21 +0000 |
commit | 532e9e2402a62367b965b0901478d27570b6d3a2 (patch) | |
tree | 48058a1945321182060862c21f578f25cee7cbc3 | |
parent | f2170a379b0fcd79191b5363cddaf0cbc508fd2b (diff) | |
download | gcc-532e9e2402a62367b965b0901478d27570b6d3a2.zip gcc-532e9e2402a62367b965b0901478d27570b6d3a2.tar.gz gcc-532e9e2402a62367b965b0901478d27570b6d3a2.tar.bz2 |
[ARM][GCC][4/4x]: MVE intrinsics with quaternary operands.
This patch supports following MVE ACLE intrinsics with quaternary operands.
vabdq_m_f32, vabdq_m_f16, vaddq_m_f32, vaddq_m_f16, vaddq_m_n_f32, vaddq_m_n_f16, vandq_m_f32, vandq_m_f16, vbicq_m_f32, vbicq_m_f16, vbrsrq_m_n_f32, vbrsrq_m_n_f16, vcaddq_rot270_m_f32, vcaddq_rot270_m_f16, vcaddq_rot90_m_f32, vcaddq_rot90_m_f16, vcmlaq_m_f32, vcmlaq_m_f16, vcmlaq_rot180_m_f32, vcmlaq_rot180_m_f16, vcmlaq_rot270_m_f32, vcmlaq_rot270_m_f16, vcmlaq_rot90_m_f32, vcmlaq_rot90_m_f16, vcmulq_m_f32, vcmulq_m_f16, vcmulq_rot180_m_f32, vcmulq_rot180_m_f16, vcmulq_rot270_m_f32, vcmulq_rot270_m_f16, vcmulq_rot90_m_f32, vcmulq_rot90_m_f16, vcvtq_m_n_s32_f32, vcvtq_m_n_s16_f16, vcvtq_m_n_u32_f32, vcvtq_m_n_u16_f16, veorq_m_f32, veorq_m_f16, vfmaq_m_f32, vfmaq_m_f16, vfmaq_m_n_f32, vfmaq_m_n_f16, vfmasq_m_n_f32, vfmasq_m_n_f16, vfmsq_m_f32, vfmsq_m_f16, vmaxnmq_m_f32, vmaxnmq_m_f16, vminnmq_m_f32, vminnmq_m_f16, vmulq_m_f32, vmulq_m_f16, vmulq_m_n_f32, vmulq_m_n_f16, vornq_m_f32, vornq_m_f16, vorrq_m_f32, vorrq_m_f16, vsubq_m_f32, vsubq_m_f16, vsubq_m_n_f32, vsubq_m_n_f16.
Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details.
[1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics
2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com>
* config/arm/arm_mve.h (vabdq_m_f32): Define macro.
(vabdq_m_f16): Likewise.
(vaddq_m_f32): Likewise.
(vaddq_m_f16): Likewise.
(vaddq_m_n_f32): Likewise.
(vaddq_m_n_f16): Likewise.
(vandq_m_f32): Likewise.
(vandq_m_f16): Likewise.
(vbicq_m_f32): Likewise.
(vbicq_m_f16): Likewise.
(vbrsrq_m_n_f32): Likewise.
(vbrsrq_m_n_f16): Likewise.
(vcaddq_rot270_m_f32): Likewise.
(vcaddq_rot270_m_f16): Likewise.
(vcaddq_rot90_m_f32): Likewise.
(vcaddq_rot90_m_f16): Likewise.
(vcmlaq_m_f32): Likewise.
(vcmlaq_m_f16): Likewise.
(vcmlaq_rot180_m_f32): Likewise.
(vcmlaq_rot180_m_f16): Likewise.
(vcmlaq_rot270_m_f32): Likewise.
(vcmlaq_rot270_m_f16): Likewise.
(vcmlaq_rot90_m_f32): Likewise.
(vcmlaq_rot90_m_f16): Likewise.
(vcmulq_m_f32): Likewise.
(vcmulq_m_f16): Likewise.
(vcmulq_rot180_m_f32): Likewise.
(vcmulq_rot180_m_f16): Likewise.
(vcmulq_rot270_m_f32): Likewise.
(vcmulq_rot270_m_f16): Likewise.
(vcmulq_rot90_m_f32): Likewise.
(vcmulq_rot90_m_f16): Likewise.
(vcvtq_m_n_s32_f32): Likewise.
(vcvtq_m_n_s16_f16): Likewise.
(vcvtq_m_n_u32_f32): Likewise.
(vcvtq_m_n_u16_f16): Likewise.
(veorq_m_f32): Likewise.
(veorq_m_f16): Likewise.
(vfmaq_m_f32): Likewise.
(vfmaq_m_f16): Likewise.
(vfmaq_m_n_f32): Likewise.
(vfmaq_m_n_f16): Likewise.
(vfmasq_m_n_f32): Likewise.
(vfmasq_m_n_f16): Likewise.
(vfmsq_m_f32): Likewise.
(vfmsq_m_f16): Likewise.
(vmaxnmq_m_f32): Likewise.
(vmaxnmq_m_f16): Likewise.
(vminnmq_m_f32): Likewise.
(vminnmq_m_f16): Likewise.
(vmulq_m_f32): Likewise.
(vmulq_m_f16): Likewise.
(vmulq_m_n_f32): Likewise.
(vmulq_m_n_f16): Likewise.
(vornq_m_f32): Likewise.
(vornq_m_f16): Likewise.
(vorrq_m_f32): Likewise.
(vorrq_m_f16): Likewise.
(vsubq_m_f32): Likewise.
(vsubq_m_f16): Likewise.
(vsubq_m_n_f32): Likewise.
(vsubq_m_n_f16): Likewise.
(__attribute__): Likewise.
(__arm_vabdq_m_f32): Likewise.
(__arm_vabdq_m_f16): Likewise.
(__arm_vaddq_m_f32): Likewise.
(__arm_vaddq_m_f16): Likewise.
(__arm_vaddq_m_n_f32): Likewise.
(__arm_vaddq_m_n_f16): Likewise.
(__arm_vandq_m_f32): Likewise.
(__arm_vandq_m_f16): Likewise.
(__arm_vbicq_m_f32): Likewise.
(__arm_vbicq_m_f16): Likewise.
(__arm_vbrsrq_m_n_f32): Likewise.
(__arm_vbrsrq_m_n_f16): Likewise.
(__arm_vcaddq_rot270_m_f32): Likewise.
(__arm_vcaddq_rot270_m_f16): Likewise.
(__arm_vcaddq_rot90_m_f32): Likewise.
(__arm_vcaddq_rot90_m_f16): Likewise.
(__arm_vcmlaq_m_f32): Likewise.
(__arm_vcmlaq_m_f16): Likewise.
(__arm_vcmlaq_rot180_m_f32): Likewise.
(__arm_vcmlaq_rot180_m_f16): Likewise.
(__arm_vcmlaq_rot270_m_f32): Likewise.
(__arm_vcmlaq_rot270_m_f16): Likewise.
(__arm_vcmlaq_rot90_m_f32): Likewise.
(__arm_vcmlaq_rot90_m_f16): Likewise.
(__arm_vcmulq_m_f32): Likewise.
(__arm_vcmulq_m_f16): Likewise.
(__arm_vcmulq_rot180_m_f32): Define intrinsic.
(__arm_vcmulq_rot180_m_f16): Likewise.
(__arm_vcmulq_rot270_m_f32): Likewise.
(__arm_vcmulq_rot270_m_f16): Likewise.
(__arm_vcmulq_rot90_m_f32): Likewise.
(__arm_vcmulq_rot90_m_f16): Likewise.
(__arm_vcvtq_m_n_s32_f32): Likewise.
(__arm_vcvtq_m_n_s16_f16): Likewise.
(__arm_vcvtq_m_n_u32_f32): Likewise.
(__arm_vcvtq_m_n_u16_f16): Likewise.
(__arm_veorq_m_f32): Likewise.
(__arm_veorq_m_f16): Likewise.
(__arm_vfmaq_m_f32): Likewise.
(__arm_vfmaq_m_f16): Likewise.
(__arm_vfmaq_m_n_f32): Likewise.
(__arm_vfmaq_m_n_f16): Likewise.
(__arm_vfmasq_m_n_f32): Likewise.
(__arm_vfmasq_m_n_f16): Likewise.
(__arm_vfmsq_m_f32): Likewise.
(__arm_vfmsq_m_f16): Likewise.
(__arm_vmaxnmq_m_f32): Likewise.
(__arm_vmaxnmq_m_f16): Likewise.
(__arm_vminnmq_m_f32): Likewise.
(__arm_vminnmq_m_f16): Likewise.
(__arm_vmulq_m_f32): Likewise.
(__arm_vmulq_m_f16): Likewise.
(__arm_vmulq_m_n_f32): Likewise.
(__arm_vmulq_m_n_f16): Likewise.
(__arm_vornq_m_f32): Likewise.
(__arm_vornq_m_f16): Likewise.
(__arm_vorrq_m_f32): Likewise.
(__arm_vorrq_m_f16): Likewise.
(__arm_vsubq_m_f32): Likewise.
(__arm_vsubq_m_f16): Likewise.
(__arm_vsubq_m_n_f32): Likewise.
(__arm_vsubq_m_n_f16): Likewise.
(vabdq_m): Define polymorphic variant.
(vaddq_m): Likewise.
(vaddq_m_n): Likewise.
(vandq_m): Likewise.
(vbicq_m): Likewise.
(vbrsrq_m_n): Likewise.
(vcaddq_rot270_m): Likewise.
(vcaddq_rot90_m): Likewise.
(vcmlaq_m): Likewise.
(vcmlaq_rot180_m): Likewise.
(vcmlaq_rot270_m): Likewise.
(vcmlaq_rot90_m): Likewise.
(vcmulq_m): Likewise.
(vcmulq_rot180_m): Likewise.
(vcmulq_rot270_m): Likewise.
(vcmulq_rot90_m): Likewise.
(veorq_m): Likewise.
(vfmaq_m): Likewise.
(vfmaq_m_n): Likewise.
(vfmasq_m_n): Likewise.
(vfmsq_m): Likewise.
(vmaxnmq_m): Likewise.
(vminnmq_m): Likewise.
(vmulq_m): Likewise.
(vmulq_m_n): Likewise.
(vornq_m): Likewise.
(vsubq_m): Likewise.
(vsubq_m_n): Likewise.
(vorrq_m): Likewise.
* config/arm/arm_mve_builtins.def (QUADOP_NONE_NONE_NONE_IMM_UNONE): Use
builtin qualifier.
(QUADOP_NONE_NONE_NONE_NONE_UNONE): Likewise.
(QUADOP_UNONE_UNONE_NONE_IMM_UNONE): Likewise.
* config/arm/mve.md (mve_vabdq_m_f<mode>): Define RTL pattern.
(mve_vaddq_m_f<mode>): Likewise.
(mve_vaddq_m_n_f<mode>): Likewise.
(mve_vandq_m_f<mode>): Likewise.
(mve_vbicq_m_f<mode>): Likewise.
(mve_vbrsrq_m_n_f<mode>): Likewise.
(mve_vcaddq_rot270_m_f<mode>): Likewise.
(mve_vcaddq_rot90_m_f<mode>): Likewise.
(mve_vcmlaq_m_f<mode>): Likewise.
(mve_vcmlaq_rot180_m_f<mode>): Likewise.
(mve_vcmlaq_rot270_m_f<mode>): Likewise.
(mve_vcmlaq_rot90_m_f<mode>): Likewise.
(mve_vcmulq_m_f<mode>): Likewise.
(mve_vcmulq_rot180_m_f<mode>): Likewise.
(mve_vcmulq_rot270_m_f<mode>): Likewise.
(mve_vcmulq_rot90_m_f<mode>): Likewise.
(mve_veorq_m_f<mode>): Likewise.
(mve_vfmaq_m_f<mode>): Likewise.
(mve_vfmaq_m_n_f<mode>): Likewise.
(mve_vfmasq_m_n_f<mode>): Likewise.
(mve_vfmsq_m_f<mode>): Likewise.
(mve_vmaxnmq_m_f<mode>): Likewise.
(mve_vminnmq_m_f<mode>): Likewise.
(mve_vmulq_m_f<mode>): Likewise.
(mve_vmulq_m_n_f<mode>): Likewise.
(mve_vornq_m_f<mode>): Likewise.
(mve_vorrq_m_f<mode>): Likewise.
(mve_vsubq_m_f<mode>): Likewise.
(mve_vsubq_m_n_f<mode>): Likewise.
gcc/testsuite/ChangeLog:
2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com>
* gcc.target/arm/mve/intrinsics/vabdq_m_f16.c: New test.
* gcc.target/arm/mve/intrinsics/vabdq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vaddq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vaddq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vandq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vandq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vbicq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vbicq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/veorq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/veorq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmulq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmulq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vornq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vornq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vorrq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vorrq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsubq_m_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsubq_m_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c: Likewise.
67 files changed, 3364 insertions, 293 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8bb2b7e..3b1154b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,198 @@ Mihail Ionescu <mihail.ionescu@arm.com> Srinath Parvathaneni <srinath.parvathaneni@arm.com> + * config/arm/arm_mve.h (vabdq_m_f32): Define macro. + (vabdq_m_f16): Likewise. + (vaddq_m_f32): Likewise. + (vaddq_m_f16): Likewise. + (vaddq_m_n_f32): Likewise. + (vaddq_m_n_f16): Likewise. + (vandq_m_f32): Likewise. + (vandq_m_f16): Likewise. + (vbicq_m_f32): Likewise. + (vbicq_m_f16): Likewise. + (vbrsrq_m_n_f32): Likewise. + (vbrsrq_m_n_f16): Likewise. + (vcaddq_rot270_m_f32): Likewise. + (vcaddq_rot270_m_f16): Likewise. + (vcaddq_rot90_m_f32): Likewise. + (vcaddq_rot90_m_f16): Likewise. + (vcmlaq_m_f32): Likewise. + (vcmlaq_m_f16): Likewise. + (vcmlaq_rot180_m_f32): Likewise. + (vcmlaq_rot180_m_f16): Likewise. + (vcmlaq_rot270_m_f32): Likewise. + (vcmlaq_rot270_m_f16): Likewise. + (vcmlaq_rot90_m_f32): Likewise. + (vcmlaq_rot90_m_f16): Likewise. + (vcmulq_m_f32): Likewise. + (vcmulq_m_f16): Likewise. + (vcmulq_rot180_m_f32): Likewise. + (vcmulq_rot180_m_f16): Likewise. + (vcmulq_rot270_m_f32): Likewise. + (vcmulq_rot270_m_f16): Likewise. + (vcmulq_rot90_m_f32): Likewise. + (vcmulq_rot90_m_f16): Likewise. + (vcvtq_m_n_s32_f32): Likewise. + (vcvtq_m_n_s16_f16): Likewise. + (vcvtq_m_n_u32_f32): Likewise. + (vcvtq_m_n_u16_f16): Likewise. + (veorq_m_f32): Likewise. + (veorq_m_f16): Likewise. + (vfmaq_m_f32): Likewise. + (vfmaq_m_f16): Likewise. + (vfmaq_m_n_f32): Likewise. + (vfmaq_m_n_f16): Likewise. + (vfmasq_m_n_f32): Likewise. + (vfmasq_m_n_f16): Likewise. + (vfmsq_m_f32): Likewise. + (vfmsq_m_f16): Likewise. + (vmaxnmq_m_f32): Likewise. + (vmaxnmq_m_f16): Likewise. + (vminnmq_m_f32): Likewise. + (vminnmq_m_f16): Likewise. + (vmulq_m_f32): Likewise. + (vmulq_m_f16): Likewise. + (vmulq_m_n_f32): Likewise. + (vmulq_m_n_f16): Likewise. + (vornq_m_f32): Likewise. + (vornq_m_f16): Likewise. + (vorrq_m_f32): Likewise. + (vorrq_m_f16): Likewise. + (vsubq_m_f32): Likewise. + (vsubq_m_f16): Likewise. + (vsubq_m_n_f32): Likewise. + (vsubq_m_n_f16): Likewise. + (__attribute__): Likewise. + (__arm_vabdq_m_f32): Likewise. + (__arm_vabdq_m_f16): Likewise. + (__arm_vaddq_m_f32): Likewise. + (__arm_vaddq_m_f16): Likewise. + (__arm_vaddq_m_n_f32): Likewise. + (__arm_vaddq_m_n_f16): Likewise. + (__arm_vandq_m_f32): Likewise. + (__arm_vandq_m_f16): Likewise. + (__arm_vbicq_m_f32): Likewise. + (__arm_vbicq_m_f16): Likewise. + (__arm_vbrsrq_m_n_f32): Likewise. + (__arm_vbrsrq_m_n_f16): Likewise. + (__arm_vcaddq_rot270_m_f32): Likewise. + (__arm_vcaddq_rot270_m_f16): Likewise. + (__arm_vcaddq_rot90_m_f32): Likewise. + (__arm_vcaddq_rot90_m_f16): Likewise. + (__arm_vcmlaq_m_f32): Likewise. + (__arm_vcmlaq_m_f16): Likewise. + (__arm_vcmlaq_rot180_m_f32): Likewise. + (__arm_vcmlaq_rot180_m_f16): Likewise. + (__arm_vcmlaq_rot270_m_f32): Likewise. + (__arm_vcmlaq_rot270_m_f16): Likewise. + (__arm_vcmlaq_rot90_m_f32): Likewise. + (__arm_vcmlaq_rot90_m_f16): Likewise. + (__arm_vcmulq_m_f32): Likewise. + (__arm_vcmulq_m_f16): Likewise. + (__arm_vcmulq_rot180_m_f32): Define intrinsic. + (__arm_vcmulq_rot180_m_f16): Likewise. + (__arm_vcmulq_rot270_m_f32): Likewise. + (__arm_vcmulq_rot270_m_f16): Likewise. + (__arm_vcmulq_rot90_m_f32): Likewise. + (__arm_vcmulq_rot90_m_f16): Likewise. + (__arm_vcvtq_m_n_s32_f32): Likewise. + (__arm_vcvtq_m_n_s16_f16): Likewise. + (__arm_vcvtq_m_n_u32_f32): Likewise. + (__arm_vcvtq_m_n_u16_f16): Likewise. + (__arm_veorq_m_f32): Likewise. + (__arm_veorq_m_f16): Likewise. + (__arm_vfmaq_m_f32): Likewise. + (__arm_vfmaq_m_f16): Likewise. + (__arm_vfmaq_m_n_f32): Likewise. + (__arm_vfmaq_m_n_f16): Likewise. + (__arm_vfmasq_m_n_f32): Likewise. + (__arm_vfmasq_m_n_f16): Likewise. + (__arm_vfmsq_m_f32): Likewise. + (__arm_vfmsq_m_f16): Likewise. + (__arm_vmaxnmq_m_f32): Likewise. + (__arm_vmaxnmq_m_f16): Likewise. + (__arm_vminnmq_m_f32): Likewise. + (__arm_vminnmq_m_f16): Likewise. + (__arm_vmulq_m_f32): Likewise. + (__arm_vmulq_m_f16): Likewise. + (__arm_vmulq_m_n_f32): Likewise. + (__arm_vmulq_m_n_f16): Likewise. + (__arm_vornq_m_f32): Likewise. + (__arm_vornq_m_f16): Likewise. + (__arm_vorrq_m_f32): Likewise. + (__arm_vorrq_m_f16): Likewise. + (__arm_vsubq_m_f32): Likewise. + (__arm_vsubq_m_f16): Likewise. + (__arm_vsubq_m_n_f32): Likewise. + (__arm_vsubq_m_n_f16): Likewise. + (vabdq_m): Define polymorphic variant. + (vaddq_m): Likewise. + (vaddq_m_n): Likewise. + (vandq_m): Likewise. + (vbicq_m): Likewise. + (vbrsrq_m_n): Likewise. + (vcaddq_rot270_m): Likewise. + (vcaddq_rot90_m): Likewise. + (vcmlaq_m): Likewise. + (vcmlaq_rot180_m): Likewise. + (vcmlaq_rot270_m): Likewise. + (vcmlaq_rot90_m): Likewise. + (vcmulq_m): Likewise. + (vcmulq_rot180_m): Likewise. + (vcmulq_rot270_m): Likewise. + (vcmulq_rot90_m): Likewise. + (veorq_m): Likewise. + (vfmaq_m): Likewise. + (vfmaq_m_n): Likewise. + (vfmasq_m_n): Likewise. + (vfmsq_m): Likewise. + (vmaxnmq_m): Likewise. + (vminnmq_m): Likewise. + (vmulq_m): Likewise. + (vmulq_m_n): Likewise. + (vornq_m): Likewise. + (vsubq_m): Likewise. + (vsubq_m_n): Likewise. + (vorrq_m): Likewise. + * config/arm/arm_mve_builtins.def (QUADOP_NONE_NONE_NONE_IMM_UNONE): Use + builtin qualifier. + (QUADOP_NONE_NONE_NONE_NONE_UNONE): Likewise. + (QUADOP_UNONE_UNONE_NONE_IMM_UNONE): Likewise. + * config/arm/mve.md (mve_vabdq_m_f<mode>): Define RTL pattern. + (mve_vaddq_m_f<mode>): Likewise. + (mve_vaddq_m_n_f<mode>): Likewise. + (mve_vandq_m_f<mode>): Likewise. + (mve_vbicq_m_f<mode>): Likewise. + (mve_vbrsrq_m_n_f<mode>): Likewise. + (mve_vcaddq_rot270_m_f<mode>): Likewise. + (mve_vcaddq_rot90_m_f<mode>): Likewise. + (mve_vcmlaq_m_f<mode>): Likewise. + (mve_vcmlaq_rot180_m_f<mode>): Likewise. + (mve_vcmlaq_rot270_m_f<mode>): Likewise. + (mve_vcmlaq_rot90_m_f<mode>): Likewise. + (mve_vcmulq_m_f<mode>): Likewise. + (mve_vcmulq_rot180_m_f<mode>): Likewise. + (mve_vcmulq_rot270_m_f<mode>): Likewise. + (mve_vcmulq_rot90_m_f<mode>): Likewise. + (mve_veorq_m_f<mode>): Likewise. + (mve_vfmaq_m_f<mode>): Likewise. + (mve_vfmaq_m_n_f<mode>): Likewise. + (mve_vfmasq_m_n_f<mode>): Likewise. + (mve_vfmsq_m_f<mode>): Likewise. + (mve_vmaxnmq_m_f<mode>): Likewise. + (mve_vminnmq_m_f<mode>): Likewise. + (mve_vmulq_m_f<mode>): Likewise. + (mve_vmulq_m_n_f<mode>): Likewise. + (mve_vornq_m_f<mode>): Likewise. + (mve_vorrq_m_f<mode>): Likewise. + (mve_vsubq_m_f<mode>): Likewise. + (mve_vsubq_m_n_f<mode>): Likewise. + +2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com> + Mihail Ionescu <mihail.ionescu@arm.com> + Srinath Parvathaneni <srinath.parvathaneni@arm.com> + * config/arm/arm-protos.h (arm_mve_immediate_check): * config/arm/arm.c (arm_mve_immediate_check): Define fuction to check mode and interger value. diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 0662812..4f8135d 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -1640,6 +1640,68 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define vshrntq_m_n_s16(__a, __b, __imm, __p) __arm_vshrntq_m_n_s16(__a, __b, __imm, __p) #define vshrntq_m_n_u32(__a, __b, __imm, __p) __arm_vshrntq_m_n_u32(__a, __b, __imm, __p) #define vshrntq_m_n_u16(__a, __b, __imm, __p) __arm_vshrntq_m_n_u16(__a, __b, __imm, __p) +#define vabdq_m_f32(__inactive, __a, __b, __p) __arm_vabdq_m_f32(__inactive, __a, __b, __p) +#define vabdq_m_f16(__inactive, __a, __b, __p) __arm_vabdq_m_f16(__inactive, __a, __b, __p) +#define vaddq_m_f32(__inactive, __a, __b, __p) __arm_vaddq_m_f32(__inactive, __a, __b, __p) +#define vaddq_m_f16(__inactive, __a, __b, __p) __arm_vaddq_m_f16(__inactive, __a, __b, __p) +#define vaddq_m_n_f32(__inactive, __a, __b, __p) __arm_vaddq_m_n_f32(__inactive, __a, __b, __p) +#define vaddq_m_n_f16(__inactive, __a, __b, __p) __arm_vaddq_m_n_f16(__inactive, __a, __b, __p) +#define vandq_m_f32(__inactive, __a, __b, __p) __arm_vandq_m_f32(__inactive, __a, __b, __p) +#define vandq_m_f16(__inactive, __a, __b, __p) __arm_vandq_m_f16(__inactive, __a, __b, __p) +#define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p) +#define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p) +#define vbrsrq_m_n_f32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p) +#define vbrsrq_m_n_f16(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f16(__inactive, __a, __b, __p) +#define vcaddq_rot270_m_f32(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_f32(__inactive, __a, __b, __p) +#define vcaddq_rot270_m_f16(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_f16(__inactive, __a, __b, __p) +#define vcaddq_rot90_m_f32(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m_f32(__inactive, __a, __b, __p) +#define vcaddq_rot90_m_f16(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m_f16(__inactive, __a, __b, __p) +#define vcmlaq_m_f32(__a, __b, __c, __p) __arm_vcmlaq_m_f32(__a, __b, __c, __p) +#define vcmlaq_m_f16(__a, __b, __c, __p) __arm_vcmlaq_m_f16(__a, __b, __c, __p) +#define vcmlaq_rot180_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f32(__a, __b, __c, __p) +#define vcmlaq_rot180_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f16(__a, __b, __c, __p) +#define vcmlaq_rot270_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f32(__a, __b, __c, __p) +#define vcmlaq_rot270_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f16(__a, __b, __c, __p) +#define vcmlaq_rot90_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f32(__a, __b, __c, __p) +#define vcmlaq_rot90_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f16(__a, __b, __c, __p) +#define vcmulq_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_m_f32(__inactive, __a, __b, __p) +#define vcmulq_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_m_f16(__inactive, __a, __b, __p) +#define vcmulq_rot180_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_rot180_m_f32(__inactive, __a, __b, __p) +#define vcmulq_rot180_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_rot180_m_f16(__inactive, __a, __b, __p) +#define vcmulq_rot270_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_rot270_m_f32(__inactive, __a, __b, __p) +#define vcmulq_rot270_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_rot270_m_f16(__inactive, __a, __b, __p) +#define vcmulq_rot90_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_rot90_m_f32(__inactive, __a, __b, __p) +#define vcmulq_rot90_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_rot90_m_f16(__inactive, __a, __b, __p) +#define vcvtq_m_n_s32_f32(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_s32_f32(__inactive, __a, __imm6, __p) +#define vcvtq_m_n_s16_f16(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_s16_f16(__inactive, __a, __imm6, __p) +#define vcvtq_m_n_u32_f32(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_u32_f32(__inactive, __a, __imm6, __p) +#define vcvtq_m_n_u16_f16(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_u16_f16(__inactive, __a, __imm6, __p) +#define veorq_m_f32(__inactive, __a, __b, __p) __arm_veorq_m_f32(__inactive, __a, __b, __p) +#define veorq_m_f16(__inactive, __a, __b, __p) __arm_veorq_m_f16(__inactive, __a, __b, __p) +#define vfmaq_m_f32(__a, __b, __c, __p) __arm_vfmaq_m_f32(__a, __b, __c, __p) +#define vfmaq_m_f16(__a, __b, __c, __p) __arm_vfmaq_m_f16(__a, __b, __c, __p) +#define vfmaq_m_n_f32(__a, __b, __c, __p) __arm_vfmaq_m_n_f32(__a, __b, __c, __p) +#define vfmaq_m_n_f16(__a, __b, __c, __p) __arm_vfmaq_m_n_f16(__a, __b, __c, __p) +#define vfmasq_m_n_f32(__a, __b, __c, __p) __arm_vfmasq_m_n_f32(__a, __b, __c, __p) +#define vfmasq_m_n_f16(__a, __b, __c, __p) __arm_vfmasq_m_n_f16(__a, __b, __c, __p) +#define vfmsq_m_f32(__a, __b, __c, __p) __arm_vfmsq_m_f32(__a, __b, __c, __p) +#define vfmsq_m_f16(__a, __b, __c, __p) __arm_vfmsq_m_f16(__a, __b, __c, __p) +#define vmaxnmq_m_f32(__inactive, __a, __b, __p) __arm_vmaxnmq_m_f32(__inactive, __a, __b, __p) +#define vmaxnmq_m_f16(__inactive, __a, __b, __p) __arm_vmaxnmq_m_f16(__inactive, __a, __b, __p) +#define vminnmq_m_f32(__inactive, __a, __b, __p) __arm_vminnmq_m_f32(__inactive, __a, __b, __p) +#define vminnmq_m_f16(__inactive, __a, __b, __p) __arm_vminnmq_m_f16(__inactive, __a, __b, __p) +#define vmulq_m_f32(__inactive, __a, __b, __p) __arm_vmulq_m_f32(__inactive, __a, __b, __p) +#define vmulq_m_f16(__inactive, __a, __b, __p) __arm_vmulq_m_f16(__inactive, __a, __b, __p) +#define vmulq_m_n_f32(__inactive, __a, __b, __p) __arm_vmulq_m_n_f32(__inactive, __a, __b, __p) +#define vmulq_m_n_f16(__inactive, __a, __b, __p) __arm_vmulq_m_n_f16(__inactive, __a, __b, __p) +#define vornq_m_f32(__inactive, __a, __b, __p) __arm_vornq_m_f32(__inactive, __a, __b, __p) +#define vornq_m_f16(__inactive, __a, __b, __p) __arm_vornq_m_f16(__inactive, __a, __b, __p) +#define vorrq_m_f32(__inactive, __a, __b, __p) __arm_vorrq_m_f32(__inactive, __a, __b, __p) +#define vorrq_m_f16(__inactive, __a, __b, __p) __arm_vorrq_m_f16(__inactive, __a, __b, __p) +#define vsubq_m_f32(__inactive, __a, __b, __p) __arm_vsubq_m_f32(__inactive, __a, __b, __p) +#define vsubq_m_f16(__inactive, __a, __b, __p) __arm_vsubq_m_f16(__inactive, __a, __b, __p) +#define vsubq_m_n_f32(__inactive, __a, __b, __p) __arm_vsubq_m_n_f32(__inactive, __a, __b, __p) +#define vsubq_m_n_f16(__inactive, __a, __b, __p) __arm_vsubq_m_n_f16(__inactive, __a, __b, __p) #endif __extension__ extern __inline void @@ -12641,6 +12703,439 @@ __arm_vcvtq_m_n_f32_s32 (float32x4_t __inactive, int32x4_t __a, const int __imm6 return __builtin_mve_vcvtq_m_n_to_f_sv4sf (__inactive, __a, __imm6, __p); } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vabdq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vabdq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vabdq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vabdq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vaddq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vaddq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, float32_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vaddq_m_n_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vaddq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, float16_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vaddq_m_n_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vandq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vandq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vandq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vandq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vbicq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbicq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vbicq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbrsrq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, int32_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vbrsrq_m_n_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vbrsrq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, int32_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vbrsrq_m_n_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot270_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcaddq_rot270_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot270_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcaddq_rot270_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot90_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcaddq_rot90_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcaddq_rot90_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcaddq_rot90_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_m_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_m_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_rot180_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_rot180_m_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_rot180_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_rot180_m_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_rot270_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_rot270_m_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_rot270_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_rot270_m_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_rot90_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_rot90_m_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmlaq_rot90_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vcmlaq_rot90_m_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot180_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_rot180_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot180_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_rot180_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot270_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_rot270_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot270_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_rot270_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot90_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_rot90_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcmulq_rot90_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vcmulq_rot90_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcvtq_m_n_s32_f32 (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p) +{ + return __builtin_mve_vcvtq_m_n_from_f_sv4si (__inactive, __a, __imm6, __p); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcvtq_m_n_s16_f16 (int16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p) +{ + return __builtin_mve_vcvtq_m_n_from_f_sv8hi (__inactive, __a, __imm6, __p); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcvtq_m_n_u32_f32 (uint32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p) +{ + return __builtin_mve_vcvtq_m_n_from_f_uv4si (__inactive, __a, __imm6, __p); +} + +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vcvtq_m_n_u16_f16 (uint16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p) +{ + return __builtin_mve_vcvtq_m_n_from_f_uv8hi (__inactive, __a, __imm6, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_veorq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_veorq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_veorq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_veorq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmaq_m_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmaq_m_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmaq_m_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmaq_m_n_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmaq_m_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmaq_m_n_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmasq_m_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmasq_m_n_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmasq_m_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmasq_m_n_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmsq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmsq_m_fv4sf (__a, __b, __c, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vfmsq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) +{ + return __builtin_mve_vfmsq_m_fv8hf (__a, __b, __c, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vmaxnmq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmaxnmq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vmaxnmq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vminnmq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vminnmq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vminnmq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vmulq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vmulq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, float32_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vmulq_m_n_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vmulq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, float16_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vmulq_m_n_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vornq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vornq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vornq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vorrq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vorrq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vorrq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vsubq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vsubq_m_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vsubq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vsubq_m_fv8hf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vsubq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, float32_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vsubq_m_n_fv4sf (__inactive, __a, __b, __p); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vsubq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, float16_t __b, mve_pred16_t __p) +{ + return __builtin_mve_vsubq_m_n_fv8hf (__inactive, __a, __b, __p); +} #endif enum { @@ -14277,6 +14772,10 @@ extern void *__ARM_undef; #define __arm_vcvtq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_n_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_n_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_n_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_n_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcvtq_m_n_f16_s16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcvtq_m_n_f32_s32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcvtq_m_n_f16_u16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \ @@ -14807,6 +15306,303 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) +#define vabdq_m(p0,p1,p2,p3) __arm_vabdq_m(p0,p1,p2,p3) +#define __arm_vabdq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vabdq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vabdq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vaddq_m(p0,p1,p2,p3) __arm_vaddq_m(p0,p1,p2,p3) +#define __arm_vaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + +#define vandq_m(p0,p1,p2,p3) __arm_vandq_m(p0,p1,p2,p3) +#define __arm_vandq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vandq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vandq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vandq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vandq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vandq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vandq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vandq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vandq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vbicq_m(p0,p1,p2,p3) __arm_vbicq_m(p0,p1,p2,p3) +#define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vbrsrq_m(p0,p1,p2,p3) __arm_vbrsrq_m(p0,p1,p2,p3) +#define __arm_vbrsrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbrsrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2, p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbrsrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbrsrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbrsrq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbrsrq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3));}) + +#define vcaddq_rot270_m(p0,p1,p2,p3) __arm_vcaddq_rot270_m(p0,p1,p2,p3) +#define __arm_vcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcaddq_rot270_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcaddq_rot270_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcaddq_rot270_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcaddq_rot270_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot270_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot270_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcaddq_rot90_m(p0,p1,p2,p3) __arm_vcaddq_rot90_m(p0,p1,p2,p3) +#define __arm_vcaddq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcaddq_rot90_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcaddq_rot90_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot90_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot90_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot90_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot90_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmlaq_m(p0,p1,p2,p3) __arm_vcmlaq_m(p0,p1,p2,p3) +#define __arm_vcmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmlaq_rot180_m(p0,p1,p2,p3) __arm_vcmlaq_rot180_m(p0,p1,p2,p3) +#define __arm_vcmlaq_rot180_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot180_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot180_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmlaq_rot270_m(p0,p1,p2,p3) __arm_vcmlaq_rot270_m(p0,p1,p2,p3) +#define __arm_vcmlaq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmlaq_rot90_m(p0,p1,p2,p3) __arm_vcmlaq_rot90_m(p0,p1,p2,p3) +#define __arm_vcmlaq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmulq_m(p0,p1,p2,p3) __arm_vcmulq_m(p0,p1,p2,p3) +#define __arm_vcmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmulq_rot180_m(p0,p1,p2,p3) __arm_vcmulq_rot180_m(p0,p1,p2,p3) +#define __arm_vcmulq_rot180_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot180_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot180_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmulq_rot270_m(p0,p1,p2,p3) __arm_vcmulq_rot270_m(p0,p1,p2,p3) +#define __arm_vcmulq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vcmulq_rot90_m(p0,p1,p2,p3) __arm_vcmulq_rot90_m(p0,p1,p2,p3) +#define __arm_vcmulq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)] [__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot90_m_f16(__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot90_m_f32(__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define veorq_m(p0,p1,p2,p3) __arm_veorq_m(p0,p1,p2,p3) +#define __arm_veorq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_veorq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_veorq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_veorq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_veorq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_veorq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_veorq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vfmaq_m(p0,p1,p2,p3) __arm_vfmaq_m(p0,p1,p2,p3) +#define __arm_vfmaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + +#define vfmasq_m(p0,p1,p2,p3) __arm_vfmasq_m(p0,p1,p2,p3) +#define __arm_vfmasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + +#define vfmsq_m(p0,p1,p2,p3) __arm_vfmsq_m(p0,p1,p2,p3) +#define __arm_vfmsq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmsq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmsq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vmaxnmq_m(p0,p1,p2,p3) __arm_vmaxnmq_m(p0,p1,p2,p3) +#define __arm_vmaxnmq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vminnmq_m(p0,p1,p2,p3) __arm_vminnmq_m(p0,p1,p2,p3) +#define __arm_vminnmq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vminnmq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vminnmq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vmulq_m(p0,p1,p2,p3) __arm_vmulq_m(p0,p1,p2,p3) +#define __arm_vmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmulq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmulq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + +#define vornq_m(p0,p1,p2,p3) __arm_vornq_m(p0,p1,p2,p3) +#define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + +#define vsubq_m(p0,p1,p2,p3) __arm_vsubq_m(p0,p1,p2,p3) +#define __arm_vsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsubq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsubq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsubq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsubq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsubq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsubq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vsubq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vsubq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vsubq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vsubq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vsubq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + +#define vorrq_m(p0,p1,p2,p3) __arm_vorrq_m(p0,p1,p2,p3) +#define __arm_vorrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vorrq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vorrq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vorrq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vorrq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vorrq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vorrq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vorrq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vorrq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) + #else /* MVE Integer. */ #define vst4q(p0,p1) __arm_vst4q(p0,p1) @@ -16762,30 +17558,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) -#define vmaxq_m(p0,p1,p2,p3) __arm_vmaxq_m(p0,p1,p2,p3) -#define __arm_vmaxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vminq_m(p0,p1,p2,p3) __arm_vminq_m(p0,p1,p2,p3) -#define __arm_vminq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - #define vmladavaq_p(p0,p1,p2,p3) __arm_vmladavaq_p(p0,p1,p2,p3) #define __arm_vmladavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -16798,66 +17570,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaq_p_u16 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ int (*)[__ARM_mve_type_uint32_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) -#define vmlaq_m(p0,p1,p2,p3) __arm_vmlaq_m(p0,p1,p2,p3) -#define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlaq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlaq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlaq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlaq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlaq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlaq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));}) - -#define vmlasq_m(p0,p1,p2,p3) __arm_vmlasq_m(p0,p1,p2,p3) -#define __arm_vmlasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlasq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlasq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlasq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlasq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlasq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlasq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));}) - -#define vmulhq_m(p0,p1,p2,p3) __arm_vmulhq_m(p0,p1,p2,p3) -#define __arm_vmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulhq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulhq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulhq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vmullbq_int_m(p0,p1,p2,p3) __arm_vmullbq_int_m(p0,p1,p2,p3) -#define __arm_vmullbq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vmulltq_int_m(p0,p1,p2,p3) __arm_vmulltq_int_m(p0,p1,p2,p3) -#define __arm_vmulltq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - #define vornq_m(p0,p1,p2,p3) __arm_vornq_m(p0,p1,p2,p3) #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -16882,14 +17594,43 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vorrq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vorrq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) -#define vqdmlahq_m(p0,p1,p2,p3) __arm_vqdmlahq_m(p0,p1,p2,p3) -#define __arm_vqdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ +#define vaddq_m(p0,p1,p2,p3) __arm_vaddq_m(p0,p1,p2,p3) +#define __arm_vaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmlahq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmlahq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmlahq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));}) + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vmulq_m(p0,p1,p2,p3) __arm_vmulq_m(p0,p1,p2,p3) +#define __arm_vmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#endif /* MVE Integer. */ #define vqrdmlahq_m(p0,p1,p2,p3) __arm_vqrdmlahq_m(p0,p1,p2,p3) #define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ @@ -17013,126 +17754,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));}) -#define vaddq_m(p0,p1,p2,p3) __arm_vaddq_m(p0,p1,p2,p3) -#define __arm_vaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vhaddq_m(p0,p1,p2,p3) __arm_vhaddq_m(p0,p1,p2,p3) -#define __arm_vhaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vhcaddq_rot270_m(p0,p1,p2,p3) __arm_vhcaddq_rot270_m(p0,p1,p2,p3) -#define __arm_vhcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot270_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot270_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot270_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vhcaddq_rot90_m(p0,p1,p2,p3) __arm_vhcaddq_rot90_m(p0,p1,p2,p3) -#define __arm_vhcaddq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot90_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vhsubq_m(p0,p1,p2,p3) __arm_vhsubq_m(p0,p1,p2,p3) -#define __arm_vhsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhsubq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhsubq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhsubq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhsubq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhsubq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhsubq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhsubq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhsubq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhsubq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));}) - -#define vmulq_m(p0,p1,p2,p3) __arm_vmulq_m(p0,p1,p2,p3) -#define __arm_vmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vqaddq_m(p0,p1,p2,p3) __arm_vqaddq_m(p0,p1,p2,p3) -#define __arm_vqaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) - -#define vqdmulhq_m(p0,p1,p2,p3) __arm_vqdmulhq_m(p0,p1,p2,p3) -#define __arm_vqdmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - #define vqsubq_m(p0,p1,p2,p3) __arm_vqsubq_m(p0,p1,p2,p3) #define __arm_vqsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -17181,67 +17802,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) -#define vqrdmladhq_m(p0,p1,p2,p3) __arm_vqrdmladhq_m(p0,p1,p2,p3) -#define __arm_vqrdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vqrdmladhxq_m(p0,p1,p2,p3) __arm_vqrdmladhxq_m(p0,p1,p2,p3) -#define __arm_vqrdmladhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vmlsdavaxq_p(p0,p1,p2,p3) __arm_vmlsdavaxq_p(p0,p1,p2,p3) -#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vmlsdavaq_p(p0,p1,p2,p3) __arm_vmlsdavaq_p(p0,p1,p2,p3) -#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vmladavaxq_p(p0,p1,p2,p3) __arm_vmladavaxq_p(p0,p1,p2,p3) -#define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaxq_p_s8 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ - int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaxq_p_s16 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaxq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - -#define vmullbq_poly_m(p0,p1,p2,p3) __arm_vmullbq_poly_m(p0,p1,p2,p3) -#define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));}) - -#define vmulltq_poly_m(p0,p1,p2,p3) __arm_vmulltq_poly_m(p0,p1,p2,p3) -#define __arm_vmulltq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));}) - #define vshllbq_m(p0,p1,p2,p3) __arm_vshllbq_m(p0,p1,p2,p3) #define __arm_vshllbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -17301,26 +17861,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));}) -#define vqdmullbq_m(p0,p1,p2,p3) __arm_vqdmullbq_m(p0,p1,p2,p3) -#define __arm_vqdmullbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmullbq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmullbq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));}) - -#define vqdmulltq_m(p0,p1,p2,p3) __arm_vqdmulltq_m(p0,p1,p2,p3) -#define __arm_vqdmulltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulltq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ - int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulltq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ - int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) - #define vqrshrnbq_m(p0,p1,p2,p3) __arm_vqrshrnbq_m(p0,p1,p2,p3) #define __arm_vqrshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -17433,8 +17973,6 @@ extern void *__ARM_undef; #define vrmlsldavhaxq_p(p0,p1,p2,p3) __arm_vrmlsldavhaxq_p(p0,p1,p2,p3) #define __arm_vrmlsldavhaxq_p(p0,p1,p2,p3) __arm_vrmlsldavhaxq_p_s32(p0,p1,p2,p3) -#endif /* MVE Integer. */ - #define vqdmladhq_m(p0,p1,p2,p3) __arm_vqdmladhq_m(p0,p1,p2,p3) #define __arm_vqdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -17561,6 +18099,264 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));}) +#define vhaddq_m(p0,p1,p2,p3) __arm_vhaddq_m(p0,p1,p2,p3) +#define __arm_vhaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vhcaddq_rot270_m(p0,p1,p2,p3) __arm_vhcaddq_rot270_m(p0,p1,p2,p3) +#define __arm_vhcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot270_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot270_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot270_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vhcaddq_rot90_m(p0,p1,p2,p3) __arm_vhcaddq_rot90_m(p0,p1,p2,p3) +#define __arm_vhcaddq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot90_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vhsubq_m(p0,p1,p2,p3) __arm_vhsubq_m(p0,p1,p2,p3) +#define __arm_vhsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhsubq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhsubq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhsubq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhsubq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhsubq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhsubq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhsubq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhsubq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhsubq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));}) + +#define vmaxq_m(p0,p1,p2,p3) __arm_vmaxq_m(p0,p1,p2,p3) +#define __arm_vmaxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vminq_m(p0,p1,p2,p3) __arm_vminq_m(p0,p1,p2,p3) +#define __arm_vminq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vmlaq_m(p0,p1,p2,p3) __arm_vmlaq_m(p0,p1,p2,p3) +#define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlaq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlaq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlaq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlaq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlaq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlaq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));}) + +#define vmlasq_m(p0,p1,p2,p3) __arm_vmlasq_m(p0,p1,p2,p3) +#define __arm_vmlasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlasq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlasq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlasq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlasq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlasq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlasq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));}) + +#define vmulhq_m(p0,p1,p2,p3) __arm_vmulhq_m(p0,p1,p2,p3) +#define __arm_vmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulhq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulhq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulhq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vmullbq_int_m(p0,p1,p2,p3) __arm_vmullbq_int_m(p0,p1,p2,p3) +#define __arm_vmullbq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vmulltq_int_m(p0,p1,p2,p3) __arm_vmulltq_int_m(p0,p1,p2,p3) +#define __arm_vmulltq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vmulltq_poly_m(p0,p1,p2,p3) __arm_vmulltq_poly_m(p0,p1,p2,p3) +#define __arm_vmulltq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));}) + +#define vqaddq_m(p0,p1,p2,p3) __arm_vqaddq_m(p0,p1,p2,p3) +#define __arm_vqaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + +#define vqdmlahq_m(p0,p1,p2,p3) __arm_vqdmlahq_m(p0,p1,p2,p3) +#define __arm_vqdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmlahq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmlahq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmlahq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));}) + +#define vqdmulhq_m(p0,p1,p2,p3) __arm_vqdmulhq_m(p0,p1,p2,p3) +#define __arm_vqdmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vqdmullbq_m(p0,p1,p2,p3) __arm_vqdmullbq_m(p0,p1,p2,p3) +#define __arm_vqdmullbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmullbq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmullbq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));}) + +#define vqdmulltq_m(p0,p1,p2,p3) __arm_vqdmulltq_m(p0,p1,p2,p3) +#define __arm_vqdmulltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulltq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \ + int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulltq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vqrdmladhq_m(p0,p1,p2,p3) __arm_vqrdmladhq_m(p0,p1,p2,p3) +#define __arm_vqrdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vqrdmladhxq_m(p0,p1,p2,p3) __arm_vqrdmladhxq_m(p0,p1,p2,p3) +#define __arm_vqrdmladhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vmlsdavaxq_p(p0,p1,p2,p3) __arm_vmlsdavaxq_p(p0,p1,p2,p3) +#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vmlsdavaq_p(p0,p1,p2,p3) __arm_vmlsdavaq_p(p0,p1,p2,p3) +#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vmladavaxq_p(p0,p1,p2,p3) __arm_vmladavaxq_p(p0,p1,p2,p3) +#define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaxq_p_s8 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \ + int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaxq_p_s16 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \ + int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaxq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));}) + +#define vmullbq_poly_m(p0,p1,p2,p3) __arm_vmullbq_poly_m(p0,p1,p2,p3) +#define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ + __typeof(p1) __p1 = (p1); \ + __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));}) + #ifdef __cplusplus } #endif diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 6048591..b448889 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -654,3 +654,34 @@ VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlsldavhaxq_p_s, v4si) VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlsldavhaq_p_s, v4si) VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlaldavhaxq_p_s, v4si) VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlaldavhaq_p_s, v4si) +VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vcvtq_m_n_from_f_u, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vcvtq_m_n_from_f_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbrsrq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vorrq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vornq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vminnmq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmaxnmq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmsq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmasq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmaq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmaq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, veorq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot90_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot270_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot180_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot90_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot270_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot180_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot90_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot270_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbicq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vandq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vabdq_m_f, v8hf, v4sf) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 3dd33d8..bf4eb5d 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -186,7 +186,12 @@ VQDMULLTQ_M_N_S VQDMULLTQ_M_S VQRSHRUNBQ_M_N_S VQRSHRUNTQ_M_N_SVQSHRUNBQ_M_N_S VQSHRUNTQ_M_N_S VRMLALDAVHAQ_P_U VRMLALDAVHAXQ_P_S VRMLSLDAVHAQ_P_S - VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S]) + VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S + VCMLAQ_M_F VCMLAQ_ROT180_M_F VCMLAQ_ROT270_M_F + VCMLAQ_ROT90_M_F VCMULQ_M_F VCMULQ_ROT180_M_F + VCMULQ_ROT270_M_F VCMULQ_ROT90_M_F VFMAQ_M_F + VFMAQ_M_N_F VFMASQ_M_N_F VFMSQ_M_F VMAXNMQ_M_F + VMINNMQ_M_F VSUBQ_M_F]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -7443,3 +7448,495 @@ "vpst\;vrmlsldavhaxt.s32\t%Q0, %R0, %q2, %q3" [(set_attr "type" "mve_move") (set_attr "length""8")]) +;; +;; [vabdq_m_f]) +;; +(define_insn "mve_vabdq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VABDQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vabdt.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vaddq_m_f]) +;; +(define_insn "mve_vaddq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VADDQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vaddt.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vaddq_m_n_f]) +;; +(define_insn "mve_vaddq_m_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:<V_elem> 3 "s_register_operand" "r") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VADDQ_M_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vaddt.f%#<V_sz_elem> %q0, %q2, %3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vandq_m_f]) +;; +(define_insn "mve_vandq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VANDQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vandt %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vbicq_m_f]) +;; +(define_insn "mve_vbicq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VBICQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vbict %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vbrsrq_m_n_f]) +;; +(define_insn "mve_vbrsrq_m_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:SI 3 "s_register_operand" "r") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VBRSRQ_M_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vbrsrt.%#<V_sz_elem> %q0, %q2, %3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcaddq_rot270_m_f]) +;; +(define_insn "mve_vcaddq_rot270_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCADDQ_ROT270_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcaddt.f%#<V_sz_elem> %q0, %q2, %q3, #270" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcaddq_rot90_m_f]) +;; +(define_insn "mve_vcaddq_rot90_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCADDQ_ROT90_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcaddt.f%#<V_sz_elem> %q0, %q2, %q3, #90" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmlaq_m_f]) +;; +(define_insn "mve_vcmlaq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMLAQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #0" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmlaq_rot180_m_f]) +;; +(define_insn "mve_vcmlaq_rot180_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMLAQ_ROT180_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #180" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmlaq_rot270_m_f]) +;; +(define_insn "mve_vcmlaq_rot270_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMLAQ_ROT270_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #270" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmlaq_rot90_m_f]) +;; +(define_insn "mve_vcmlaq_rot90_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMLAQ_ROT90_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #90" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmulq_m_f]) +;; +(define_insn "mve_vcmulq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMULQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #0" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmulq_rot180_m_f]) +;; +(define_insn "mve_vcmulq_rot180_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMULQ_ROT180_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #180" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmulq_rot270_m_f]) +;; +(define_insn "mve_vcmulq_rot270_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMULQ_ROT270_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #270" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vcmulq_rot90_m_f]) +;; +(define_insn "mve_vcmulq_rot90_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VCMULQ_ROT90_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #90" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [veorq_m_f]) +;; +(define_insn "mve_veorq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VEORQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;veort %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vfmaq_m_f]) +;; +(define_insn "mve_vfmaq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VFMAQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vfmat.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vfmaq_m_n_f]) +;; +(define_insn "mve_vfmaq_m_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:<V_elem> 3 "s_register_operand" "r") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VFMAQ_M_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vfmat.f%#<V_sz_elem> %q0, %q2, %3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vfmasq_m_n_f]) +;; +(define_insn "mve_vfmasq_m_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:<V_elem> 3 "s_register_operand" "r") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VFMASQ_M_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vfmast.f%#<V_sz_elem> %q0, %q2, %3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vfmsq_m_f]) +;; +(define_insn "mve_vfmsq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VFMSQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vfmst.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vmaxnmq_m_f]) +;; +(define_insn "mve_vmaxnmq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VMAXNMQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vmaxnmt.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vminnmq_m_f]) +;; +(define_insn "mve_vminnmq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VMINNMQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vminnmt.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vmulq_m_f]) +;; +(define_insn "mve_vmulq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VMULQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vmult.f%#<V_sz_elem> %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vmulq_m_n_f]) +;; +(define_insn "mve_vmulq_m_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:<V_elem> 3 "s_register_operand" "r") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VMULQ_M_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vmult.f%#<V_sz_elem> %q0, %q2, %3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vornq_m_f]) +;; +(define_insn "mve_vornq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VORNQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vornt %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vorrq_m_f]) +;; +(define_insn "mve_vorrq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VORRQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vorrt %q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vsubq_m_f]) +;; +(define_insn "mve_vsubq_m_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:MVE_0 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VSUBQ_M_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vsubt.f%#<V_sz_elem>\t%q0, %q2, %q3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) + +;; +;; [vsubq_m_n_f]) +;; +(define_insn "mve_vsubq_m_n_f<mode>" + [ + (set (match_operand:MVE_0 0 "s_register_operand" "=w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") + (match_operand:MVE_0 2 "s_register_operand" "w") + (match_operand:<V_elem> 3 "s_register_operand" "r") + (match_operand:HI 4 "vpr_register_operand" "Up")] + VSUBQ_M_N_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vsubt.f%#<V_sz_elem>\t%q0, %q2, %3" + [(set_attr "type" "mve_move") + (set_attr "length""8")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 650a6b9..4ed6dc7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -2,6 +2,73 @@ Mihail Ionescu <mihail.ionescu@arm.com> Srinath Parvathaneni <srinath.parvathaneni@arm.com> + * gcc.target/arm/mve/intrinsics/vabdq_m_f16.c: New test. + * gcc.target/arm/mve/intrinsics/vabdq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vandq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vandq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vbicq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vbicq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/veorq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/veorq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vornq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vornq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vorrq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vorrq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_m_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_m_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c: Likewise. + +2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com> + Mihail Ionescu <mihail.ionescu@arm.com> + Srinath Parvathaneni <srinath.parvathaneni@arm.com> + * gcc.target/arm/mve/intrinsics/vmlaldavaq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmlaldavaq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmlaldavaq_p_u16.c: Likewise. diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c new file mode 100644 index 0000000..dac3dd7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vabdq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vabdt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vabdq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vabdt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c new file mode 100644 index 0000000..d1b59ea --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vabdq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vabdt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vabdq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vabdt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c new file mode 100644 index 0000000..b52b719 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vaddq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vaddq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c new file mode 100644 index 0000000..c3e7c2d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vaddq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vaddq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c new file mode 100644 index 0000000..94d6b6f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vaddq_m_n_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vaddq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c new file mode 100644 index 0000000..ab100cb --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vaddq_m_n_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vaddq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vaddt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c new file mode 100644 index 0000000..c641fce --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vandq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vandt" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vandq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vandt" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c new file mode 100644 index 0000000..a3344d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vandq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vandt" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vandq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vandt" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c new file mode 100644 index 0000000..f3237a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vbicq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbict" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vbicq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbict" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c new file mode 100644 index 0000000..975d9db --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vbicq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbict" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vbicq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbict" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c new file mode 100644 index 0000000..afb5ef8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, int32_t b, mve_pred16_t p) +{ + return vbrsrq_m_n_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbrsrt.16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, int32_t b, mve_pred16_t p) +{ + return vbrsrq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbrsrt.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c new file mode 100644 index 0000000..3b612cb --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, int32_t b, mve_pred16_t p) +{ + return vbrsrq_m_n_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbrsrt.32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, int32_t b, mve_pred16_t p) +{ + return vbrsrq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vbrsrt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c new file mode 100644 index 0000000..52af15c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcaddq_rot270_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcaddq_rot270_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c new file mode 100644 index 0000000..b5fb2e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcaddq_rot270_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcaddq_rot270_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c new file mode 100644 index 0000000..8db12c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcaddq_rot90_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcaddq_rot90_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c new file mode 100644 index 0000000..fae494c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcaddq_rot90_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcaddq_rot90_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcaddt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c new file mode 100644 index 0000000..3f86c8c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_m_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c new file mode 100644 index 0000000..3cc3342 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_m_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c new file mode 100644 index 0000000..197d8ce --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_rot180_m_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_rot180_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c new file mode 100644 index 0000000..3bd195f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_rot180_m_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_rot180_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c new file mode 100644 index 0000000..29e8638 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_rot270_m_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_rot270_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c new file mode 100644 index 0000000..a4ca602 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_rot270_m_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_rot270_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c new file mode 100644 index 0000000..55afaf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_rot90_m_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vcmlaq_rot90_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c new file mode 100644 index 0000000..89be04b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_rot90_m_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vcmlaq_rot90_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmlat.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c new file mode 100644 index 0000000..c83be6e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c new file mode 100644 index 0000000..acea3b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c new file mode 100644 index 0000000..78e7606 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_rot180_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_rot180_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c new file mode 100644 index 0000000..8643de0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_rot180_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_rot180_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c new file mode 100644 index 0000000..632e21c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_rot270_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_rot270_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c new file mode 100644 index 0000000..e5f35de --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_rot270_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_rot270_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c new file mode 100644 index 0000000..dfd5d0f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_rot90_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vcmulq_rot90_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c new file mode 100644 index 0000000..7b87791 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_rot90_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vcmulq_rot90_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcmult.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c new file mode 100644 index 0000000..62c3086 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int16x8_t +foo (int16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ + return vcvtq_m_n_s16_f16 (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.s16.f16" } } */ + +int16x8_t +foo1 (int16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ + return vcvtq_m_n (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.s16.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c new file mode 100644 index 0000000..0eeba94 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int32x4_t +foo (int32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ + return vcvtq_m_n_s32_f32 (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.s32.f32" } } */ + +int32x4_t +foo1 (int32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ + return vcvtq_m_n (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.s32.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c new file mode 100644 index 0000000..fbc3b9c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint16x8_t +foo (uint16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ + return vcvtq_m_n_u16_f16 (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.u16.f16" } } */ + +uint16x8_t +foo1 (uint16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ + return vcvtq_m_n (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.u16.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c new file mode 100644 index 0000000..b7719de --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ + return vcvtq_m_n_u32_f32 (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.u32.f32" } } */ + +uint32x4_t +foo1 (uint32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ + return vcvtq_m_n (inactive, a, 1, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vcvtt.u32.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c new file mode 100644 index 0000000..8a70933 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return veorq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "veort" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return veorq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "veort" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c new file mode 100644 index 0000000..37ab556 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return veorq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "veort" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return veorq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "veort" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c new file mode 100644 index 0000000..f27d664 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vfmaq_m_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vfmaq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c new file mode 100644 index 0000000..7e7d38f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vfmaq_m_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vfmaq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c new file mode 100644 index 0000000..93c8aa3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) +{ + return vfmaq_m_n_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) +{ + return vfmaq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c new file mode 100644 index 0000000..1f9189a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p) +{ + return vfmaq_m_n_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p) +{ + return vfmaq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmat.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c new file mode 100644 index 0000000..4f91179 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) +{ + return vfmasq_m_n_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmast.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) +{ + return vfmasq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmast.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c new file mode 100644 index 0000000..e630f44 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p) +{ + return vfmasq_m_n_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmast.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p) +{ + return vfmasq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmast.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c new file mode 100644 index 0000000..2cda2e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vfmsq_m_f16 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmst.f16" } } */ + +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) +{ + return vfmsq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmst.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c new file mode 100644 index 0000000..773edf0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vfmsq_m_f32 (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmst.f32" } } */ + +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) +{ + return vfmsq_m (a, b, c, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vfmst.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c new file mode 100644 index 0000000..43858dc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vmaxnmq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmaxnmt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vmaxnmq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmaxnmt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c new file mode 100644 index 0000000..a3b7e8f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vmaxnmq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmaxnmt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vmaxnmq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmaxnmt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c new file mode 100644 index 0000000..d01e266 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vminnmq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vminnmt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vminnmq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vminnmt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c new file mode 100644 index 0000000..5193b2f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vminnmq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vminnmt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vminnmq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vminnmt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c new file mode 100644 index 0000000..43b751b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vmulq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vmulq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c new file mode 100644 index 0000000..6dee764 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vmulq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vmulq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c new file mode 100644 index 0000000..a0b2d53 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vmulq_m_n_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vmulq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c new file mode 100644 index 0000000..c457195 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vmulq_m_n_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vmulq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vmult.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c new file mode 100644 index 0000000..9833268 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vornq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vornt" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vornq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vornt" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c new file mode 100644 index 0000000..48a720a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vornq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vornt" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vornq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vornt" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c new file mode 100644 index 0000000..6e8591e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vorrq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vorrt" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vorrq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vorrt" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c new file mode 100644 index 0000000..09ee673 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vorrq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vorrt" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vorrq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vorrt" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c new file mode 100644 index 0000000..383491d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vsubq_m_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ + return vsubq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c new file mode 100644 index 0000000..327b524 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vsubq_m_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ + return vsubq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c new file mode 100644 index 0000000..5657c36 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float16x8_t +foo (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vsubq_m_n_f16 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f16" } } */ + +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vsubq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c new file mode 100644 index 0000000..c9502cd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +float32x4_t +foo (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vsubq_m_n_f32 (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f32" } } */ + +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vsubq_m (inactive, a, b, p); +} + +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vsubt.f32" } } */ |