aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSrinath Parvathaneni <srinath.parvathaneni@arm.com>2020-03-18 17:16:21 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2020-03-18 17:16:21 +0000
commit532e9e2402a62367b965b0901478d27570b6d3a2 (patch)
tree48058a1945321182060862c21f578f25cee7cbc3
parentf2170a379b0fcd79191b5363cddaf0cbc508fd2b (diff)
downloadgcc-532e9e2402a62367b965b0901478d27570b6d3a2.zip
gcc-532e9e2402a62367b965b0901478d27570b6d3a2.tar.gz
gcc-532e9e2402a62367b965b0901478d27570b6d3a2.tar.bz2
[ARM][GCC][4/4x]: MVE intrinsics with quaternary operands.
This patch supports following MVE ACLE intrinsics with quaternary operands. vabdq_m_f32, vabdq_m_f16, vaddq_m_f32, vaddq_m_f16, vaddq_m_n_f32, vaddq_m_n_f16, vandq_m_f32, vandq_m_f16, vbicq_m_f32, vbicq_m_f16, vbrsrq_m_n_f32, vbrsrq_m_n_f16, vcaddq_rot270_m_f32, vcaddq_rot270_m_f16, vcaddq_rot90_m_f32, vcaddq_rot90_m_f16, vcmlaq_m_f32, vcmlaq_m_f16, vcmlaq_rot180_m_f32, vcmlaq_rot180_m_f16, vcmlaq_rot270_m_f32, vcmlaq_rot270_m_f16, vcmlaq_rot90_m_f32, vcmlaq_rot90_m_f16, vcmulq_m_f32, vcmulq_m_f16, vcmulq_rot180_m_f32, vcmulq_rot180_m_f16, vcmulq_rot270_m_f32, vcmulq_rot270_m_f16, vcmulq_rot90_m_f32, vcmulq_rot90_m_f16, vcvtq_m_n_s32_f32, vcvtq_m_n_s16_f16, vcvtq_m_n_u32_f32, vcvtq_m_n_u16_f16, veorq_m_f32, veorq_m_f16, vfmaq_m_f32, vfmaq_m_f16, vfmaq_m_n_f32, vfmaq_m_n_f16, vfmasq_m_n_f32, vfmasq_m_n_f16, vfmsq_m_f32, vfmsq_m_f16, vmaxnmq_m_f32, vmaxnmq_m_f16, vminnmq_m_f32, vminnmq_m_f16, vmulq_m_f32, vmulq_m_f16, vmulq_m_n_f32, vmulq_m_n_f16, vornq_m_f32, vornq_m_f16, vorrq_m_f32, vorrq_m_f16, vsubq_m_f32, vsubq_m_f16, vsubq_m_n_f32, vsubq_m_n_f16. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics 2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com> Mihail Ionescu <mihail.ionescu@arm.com> Srinath Parvathaneni <srinath.parvathaneni@arm.com> * config/arm/arm_mve.h (vabdq_m_f32): Define macro. (vabdq_m_f16): Likewise. (vaddq_m_f32): Likewise. (vaddq_m_f16): Likewise. (vaddq_m_n_f32): Likewise. (vaddq_m_n_f16): Likewise. (vandq_m_f32): Likewise. (vandq_m_f16): Likewise. (vbicq_m_f32): Likewise. (vbicq_m_f16): Likewise. (vbrsrq_m_n_f32): Likewise. (vbrsrq_m_n_f16): Likewise. (vcaddq_rot270_m_f32): Likewise. (vcaddq_rot270_m_f16): Likewise. (vcaddq_rot90_m_f32): Likewise. (vcaddq_rot90_m_f16): Likewise. (vcmlaq_m_f32): Likewise. (vcmlaq_m_f16): Likewise. (vcmlaq_rot180_m_f32): Likewise. (vcmlaq_rot180_m_f16): Likewise. (vcmlaq_rot270_m_f32): Likewise. (vcmlaq_rot270_m_f16): Likewise. (vcmlaq_rot90_m_f32): Likewise. (vcmlaq_rot90_m_f16): Likewise. (vcmulq_m_f32): Likewise. (vcmulq_m_f16): Likewise. (vcmulq_rot180_m_f32): Likewise. (vcmulq_rot180_m_f16): Likewise. (vcmulq_rot270_m_f32): Likewise. (vcmulq_rot270_m_f16): Likewise. (vcmulq_rot90_m_f32): Likewise. (vcmulq_rot90_m_f16): Likewise. (vcvtq_m_n_s32_f32): Likewise. (vcvtq_m_n_s16_f16): Likewise. (vcvtq_m_n_u32_f32): Likewise. (vcvtq_m_n_u16_f16): Likewise. (veorq_m_f32): Likewise. (veorq_m_f16): Likewise. (vfmaq_m_f32): Likewise. (vfmaq_m_f16): Likewise. (vfmaq_m_n_f32): Likewise. (vfmaq_m_n_f16): Likewise. (vfmasq_m_n_f32): Likewise. (vfmasq_m_n_f16): Likewise. (vfmsq_m_f32): Likewise. (vfmsq_m_f16): Likewise. (vmaxnmq_m_f32): Likewise. (vmaxnmq_m_f16): Likewise. (vminnmq_m_f32): Likewise. (vminnmq_m_f16): Likewise. (vmulq_m_f32): Likewise. (vmulq_m_f16): Likewise. (vmulq_m_n_f32): Likewise. (vmulq_m_n_f16): Likewise. (vornq_m_f32): Likewise. (vornq_m_f16): Likewise. (vorrq_m_f32): Likewise. (vorrq_m_f16): Likewise. (vsubq_m_f32): Likewise. (vsubq_m_f16): Likewise. (vsubq_m_n_f32): Likewise. (vsubq_m_n_f16): Likewise. (__attribute__): Likewise. (__arm_vabdq_m_f32): Likewise. (__arm_vabdq_m_f16): Likewise. (__arm_vaddq_m_f32): Likewise. (__arm_vaddq_m_f16): Likewise. (__arm_vaddq_m_n_f32): Likewise. (__arm_vaddq_m_n_f16): Likewise. (__arm_vandq_m_f32): Likewise. (__arm_vandq_m_f16): Likewise. (__arm_vbicq_m_f32): Likewise. (__arm_vbicq_m_f16): Likewise. (__arm_vbrsrq_m_n_f32): Likewise. (__arm_vbrsrq_m_n_f16): Likewise. (__arm_vcaddq_rot270_m_f32): Likewise. (__arm_vcaddq_rot270_m_f16): Likewise. (__arm_vcaddq_rot90_m_f32): Likewise. (__arm_vcaddq_rot90_m_f16): Likewise. (__arm_vcmlaq_m_f32): Likewise. (__arm_vcmlaq_m_f16): Likewise. (__arm_vcmlaq_rot180_m_f32): Likewise. (__arm_vcmlaq_rot180_m_f16): Likewise. (__arm_vcmlaq_rot270_m_f32): Likewise. (__arm_vcmlaq_rot270_m_f16): Likewise. (__arm_vcmlaq_rot90_m_f32): Likewise. (__arm_vcmlaq_rot90_m_f16): Likewise. (__arm_vcmulq_m_f32): Likewise. (__arm_vcmulq_m_f16): Likewise. (__arm_vcmulq_rot180_m_f32): Define intrinsic. (__arm_vcmulq_rot180_m_f16): Likewise. (__arm_vcmulq_rot270_m_f32): Likewise. (__arm_vcmulq_rot270_m_f16): Likewise. (__arm_vcmulq_rot90_m_f32): Likewise. (__arm_vcmulq_rot90_m_f16): Likewise. (__arm_vcvtq_m_n_s32_f32): Likewise. (__arm_vcvtq_m_n_s16_f16): Likewise. (__arm_vcvtq_m_n_u32_f32): Likewise. (__arm_vcvtq_m_n_u16_f16): Likewise. (__arm_veorq_m_f32): Likewise. (__arm_veorq_m_f16): Likewise. (__arm_vfmaq_m_f32): Likewise. (__arm_vfmaq_m_f16): Likewise. (__arm_vfmaq_m_n_f32): Likewise. (__arm_vfmaq_m_n_f16): Likewise. (__arm_vfmasq_m_n_f32): Likewise. (__arm_vfmasq_m_n_f16): Likewise. (__arm_vfmsq_m_f32): Likewise. (__arm_vfmsq_m_f16): Likewise. (__arm_vmaxnmq_m_f32): Likewise. (__arm_vmaxnmq_m_f16): Likewise. (__arm_vminnmq_m_f32): Likewise. (__arm_vminnmq_m_f16): Likewise. (__arm_vmulq_m_f32): Likewise. (__arm_vmulq_m_f16): Likewise. (__arm_vmulq_m_n_f32): Likewise. (__arm_vmulq_m_n_f16): Likewise. (__arm_vornq_m_f32): Likewise. (__arm_vornq_m_f16): Likewise. (__arm_vorrq_m_f32): Likewise. (__arm_vorrq_m_f16): Likewise. (__arm_vsubq_m_f32): Likewise. (__arm_vsubq_m_f16): Likewise. (__arm_vsubq_m_n_f32): Likewise. (__arm_vsubq_m_n_f16): Likewise. (vabdq_m): Define polymorphic variant. (vaddq_m): Likewise. (vaddq_m_n): Likewise. (vandq_m): Likewise. (vbicq_m): Likewise. (vbrsrq_m_n): Likewise. (vcaddq_rot270_m): Likewise. (vcaddq_rot90_m): Likewise. (vcmlaq_m): Likewise. (vcmlaq_rot180_m): Likewise. (vcmlaq_rot270_m): Likewise. (vcmlaq_rot90_m): Likewise. (vcmulq_m): Likewise. (vcmulq_rot180_m): Likewise. (vcmulq_rot270_m): Likewise. (vcmulq_rot90_m): Likewise. (veorq_m): Likewise. (vfmaq_m): Likewise. (vfmaq_m_n): Likewise. (vfmasq_m_n): Likewise. (vfmsq_m): Likewise. (vmaxnmq_m): Likewise. (vminnmq_m): Likewise. (vmulq_m): Likewise. (vmulq_m_n): Likewise. (vornq_m): Likewise. (vsubq_m): Likewise. (vsubq_m_n): Likewise. (vorrq_m): Likewise. * config/arm/arm_mve_builtins.def (QUADOP_NONE_NONE_NONE_IMM_UNONE): Use builtin qualifier. (QUADOP_NONE_NONE_NONE_NONE_UNONE): Likewise. (QUADOP_UNONE_UNONE_NONE_IMM_UNONE): Likewise. * config/arm/mve.md (mve_vabdq_m_f<mode>): Define RTL pattern. (mve_vaddq_m_f<mode>): Likewise. (mve_vaddq_m_n_f<mode>): Likewise. (mve_vandq_m_f<mode>): Likewise. (mve_vbicq_m_f<mode>): Likewise. (mve_vbrsrq_m_n_f<mode>): Likewise. (mve_vcaddq_rot270_m_f<mode>): Likewise. (mve_vcaddq_rot90_m_f<mode>): Likewise. (mve_vcmlaq_m_f<mode>): Likewise. (mve_vcmlaq_rot180_m_f<mode>): Likewise. (mve_vcmlaq_rot270_m_f<mode>): Likewise. (mve_vcmlaq_rot90_m_f<mode>): Likewise. (mve_vcmulq_m_f<mode>): Likewise. (mve_vcmulq_rot180_m_f<mode>): Likewise. (mve_vcmulq_rot270_m_f<mode>): Likewise. (mve_vcmulq_rot90_m_f<mode>): Likewise. (mve_veorq_m_f<mode>): Likewise. (mve_vfmaq_m_f<mode>): Likewise. (mve_vfmaq_m_n_f<mode>): Likewise. (mve_vfmasq_m_n_f<mode>): Likewise. (mve_vfmsq_m_f<mode>): Likewise. (mve_vmaxnmq_m_f<mode>): Likewise. (mve_vminnmq_m_f<mode>): Likewise. (mve_vmulq_m_f<mode>): Likewise. (mve_vmulq_m_n_f<mode>): Likewise. (mve_vornq_m_f<mode>): Likewise. (mve_vorrq_m_f<mode>): Likewise. (mve_vsubq_m_f<mode>): Likewise. (mve_vsubq_m_n_f<mode>): Likewise. gcc/testsuite/ChangeLog: 2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com> Mihail Ionescu <mihail.ionescu@arm.com> Srinath Parvathaneni <srinath.parvathaneni@arm.com> * gcc.target/arm/mve/intrinsics/vabdq_m_f16.c: New test. * gcc.target/arm/mve/intrinsics/vabdq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vandq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vandq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vbicq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vbicq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/veorq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/veorq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vornq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vornq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vorrq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vorrq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_m_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_m_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c: Likewise.
-rw-r--r--gcc/ChangeLog192
-rw-r--r--gcc/config/arm/arm_mve.h1380
-rw-r--r--gcc/config/arm/arm_mve_builtins.def31
-rw-r--r--gcc/config/arm/mve.md499
-rw-r--r--gcc/testsuite/ChangeLog67
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c24
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c24
67 files changed, 3364 insertions, 293 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8bb2b7e..3b1154b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -2,6 +2,198 @@
Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+ * config/arm/arm_mve.h (vabdq_m_f32): Define macro.
+ (vabdq_m_f16): Likewise.
+ (vaddq_m_f32): Likewise.
+ (vaddq_m_f16): Likewise.
+ (vaddq_m_n_f32): Likewise.
+ (vaddq_m_n_f16): Likewise.
+ (vandq_m_f32): Likewise.
+ (vandq_m_f16): Likewise.
+ (vbicq_m_f32): Likewise.
+ (vbicq_m_f16): Likewise.
+ (vbrsrq_m_n_f32): Likewise.
+ (vbrsrq_m_n_f16): Likewise.
+ (vcaddq_rot270_m_f32): Likewise.
+ (vcaddq_rot270_m_f16): Likewise.
+ (vcaddq_rot90_m_f32): Likewise.
+ (vcaddq_rot90_m_f16): Likewise.
+ (vcmlaq_m_f32): Likewise.
+ (vcmlaq_m_f16): Likewise.
+ (vcmlaq_rot180_m_f32): Likewise.
+ (vcmlaq_rot180_m_f16): Likewise.
+ (vcmlaq_rot270_m_f32): Likewise.
+ (vcmlaq_rot270_m_f16): Likewise.
+ (vcmlaq_rot90_m_f32): Likewise.
+ (vcmlaq_rot90_m_f16): Likewise.
+ (vcmulq_m_f32): Likewise.
+ (vcmulq_m_f16): Likewise.
+ (vcmulq_rot180_m_f32): Likewise.
+ (vcmulq_rot180_m_f16): Likewise.
+ (vcmulq_rot270_m_f32): Likewise.
+ (vcmulq_rot270_m_f16): Likewise.
+ (vcmulq_rot90_m_f32): Likewise.
+ (vcmulq_rot90_m_f16): Likewise.
+ (vcvtq_m_n_s32_f32): Likewise.
+ (vcvtq_m_n_s16_f16): Likewise.
+ (vcvtq_m_n_u32_f32): Likewise.
+ (vcvtq_m_n_u16_f16): Likewise.
+ (veorq_m_f32): Likewise.
+ (veorq_m_f16): Likewise.
+ (vfmaq_m_f32): Likewise.
+ (vfmaq_m_f16): Likewise.
+ (vfmaq_m_n_f32): Likewise.
+ (vfmaq_m_n_f16): Likewise.
+ (vfmasq_m_n_f32): Likewise.
+ (vfmasq_m_n_f16): Likewise.
+ (vfmsq_m_f32): Likewise.
+ (vfmsq_m_f16): Likewise.
+ (vmaxnmq_m_f32): Likewise.
+ (vmaxnmq_m_f16): Likewise.
+ (vminnmq_m_f32): Likewise.
+ (vminnmq_m_f16): Likewise.
+ (vmulq_m_f32): Likewise.
+ (vmulq_m_f16): Likewise.
+ (vmulq_m_n_f32): Likewise.
+ (vmulq_m_n_f16): Likewise.
+ (vornq_m_f32): Likewise.
+ (vornq_m_f16): Likewise.
+ (vorrq_m_f32): Likewise.
+ (vorrq_m_f16): Likewise.
+ (vsubq_m_f32): Likewise.
+ (vsubq_m_f16): Likewise.
+ (vsubq_m_n_f32): Likewise.
+ (vsubq_m_n_f16): Likewise.
+ (__attribute__): Likewise.
+ (__arm_vabdq_m_f32): Likewise.
+ (__arm_vabdq_m_f16): Likewise.
+ (__arm_vaddq_m_f32): Likewise.
+ (__arm_vaddq_m_f16): Likewise.
+ (__arm_vaddq_m_n_f32): Likewise.
+ (__arm_vaddq_m_n_f16): Likewise.
+ (__arm_vandq_m_f32): Likewise.
+ (__arm_vandq_m_f16): Likewise.
+ (__arm_vbicq_m_f32): Likewise.
+ (__arm_vbicq_m_f16): Likewise.
+ (__arm_vbrsrq_m_n_f32): Likewise.
+ (__arm_vbrsrq_m_n_f16): Likewise.
+ (__arm_vcaddq_rot270_m_f32): Likewise.
+ (__arm_vcaddq_rot270_m_f16): Likewise.
+ (__arm_vcaddq_rot90_m_f32): Likewise.
+ (__arm_vcaddq_rot90_m_f16): Likewise.
+ (__arm_vcmlaq_m_f32): Likewise.
+ (__arm_vcmlaq_m_f16): Likewise.
+ (__arm_vcmlaq_rot180_m_f32): Likewise.
+ (__arm_vcmlaq_rot180_m_f16): Likewise.
+ (__arm_vcmlaq_rot270_m_f32): Likewise.
+ (__arm_vcmlaq_rot270_m_f16): Likewise.
+ (__arm_vcmlaq_rot90_m_f32): Likewise.
+ (__arm_vcmlaq_rot90_m_f16): Likewise.
+ (__arm_vcmulq_m_f32): Likewise.
+ (__arm_vcmulq_m_f16): Likewise.
+ (__arm_vcmulq_rot180_m_f32): Define intrinsic.
+ (__arm_vcmulq_rot180_m_f16): Likewise.
+ (__arm_vcmulq_rot270_m_f32): Likewise.
+ (__arm_vcmulq_rot270_m_f16): Likewise.
+ (__arm_vcmulq_rot90_m_f32): Likewise.
+ (__arm_vcmulq_rot90_m_f16): Likewise.
+ (__arm_vcvtq_m_n_s32_f32): Likewise.
+ (__arm_vcvtq_m_n_s16_f16): Likewise.
+ (__arm_vcvtq_m_n_u32_f32): Likewise.
+ (__arm_vcvtq_m_n_u16_f16): Likewise.
+ (__arm_veorq_m_f32): Likewise.
+ (__arm_veorq_m_f16): Likewise.
+ (__arm_vfmaq_m_f32): Likewise.
+ (__arm_vfmaq_m_f16): Likewise.
+ (__arm_vfmaq_m_n_f32): Likewise.
+ (__arm_vfmaq_m_n_f16): Likewise.
+ (__arm_vfmasq_m_n_f32): Likewise.
+ (__arm_vfmasq_m_n_f16): Likewise.
+ (__arm_vfmsq_m_f32): Likewise.
+ (__arm_vfmsq_m_f16): Likewise.
+ (__arm_vmaxnmq_m_f32): Likewise.
+ (__arm_vmaxnmq_m_f16): Likewise.
+ (__arm_vminnmq_m_f32): Likewise.
+ (__arm_vminnmq_m_f16): Likewise.
+ (__arm_vmulq_m_f32): Likewise.
+ (__arm_vmulq_m_f16): Likewise.
+ (__arm_vmulq_m_n_f32): Likewise.
+ (__arm_vmulq_m_n_f16): Likewise.
+ (__arm_vornq_m_f32): Likewise.
+ (__arm_vornq_m_f16): Likewise.
+ (__arm_vorrq_m_f32): Likewise.
+ (__arm_vorrq_m_f16): Likewise.
+ (__arm_vsubq_m_f32): Likewise.
+ (__arm_vsubq_m_f16): Likewise.
+ (__arm_vsubq_m_n_f32): Likewise.
+ (__arm_vsubq_m_n_f16): Likewise.
+ (vabdq_m): Define polymorphic variant.
+ (vaddq_m): Likewise.
+ (vaddq_m_n): Likewise.
+ (vandq_m): Likewise.
+ (vbicq_m): Likewise.
+ (vbrsrq_m_n): Likewise.
+ (vcaddq_rot270_m): Likewise.
+ (vcaddq_rot90_m): Likewise.
+ (vcmlaq_m): Likewise.
+ (vcmlaq_rot180_m): Likewise.
+ (vcmlaq_rot270_m): Likewise.
+ (vcmlaq_rot90_m): Likewise.
+ (vcmulq_m): Likewise.
+ (vcmulq_rot180_m): Likewise.
+ (vcmulq_rot270_m): Likewise.
+ (vcmulq_rot90_m): Likewise.
+ (veorq_m): Likewise.
+ (vfmaq_m): Likewise.
+ (vfmaq_m_n): Likewise.
+ (vfmasq_m_n): Likewise.
+ (vfmsq_m): Likewise.
+ (vmaxnmq_m): Likewise.
+ (vminnmq_m): Likewise.
+ (vmulq_m): Likewise.
+ (vmulq_m_n): Likewise.
+ (vornq_m): Likewise.
+ (vsubq_m): Likewise.
+ (vsubq_m_n): Likewise.
+ (vorrq_m): Likewise.
+ * config/arm/arm_mve_builtins.def (QUADOP_NONE_NONE_NONE_IMM_UNONE): Use
+ builtin qualifier.
+ (QUADOP_NONE_NONE_NONE_NONE_UNONE): Likewise.
+ (QUADOP_UNONE_UNONE_NONE_IMM_UNONE): Likewise.
+ * config/arm/mve.md (mve_vabdq_m_f<mode>): Define RTL pattern.
+ (mve_vaddq_m_f<mode>): Likewise.
+ (mve_vaddq_m_n_f<mode>): Likewise.
+ (mve_vandq_m_f<mode>): Likewise.
+ (mve_vbicq_m_f<mode>): Likewise.
+ (mve_vbrsrq_m_n_f<mode>): Likewise.
+ (mve_vcaddq_rot270_m_f<mode>): Likewise.
+ (mve_vcaddq_rot90_m_f<mode>): Likewise.
+ (mve_vcmlaq_m_f<mode>): Likewise.
+ (mve_vcmlaq_rot180_m_f<mode>): Likewise.
+ (mve_vcmlaq_rot270_m_f<mode>): Likewise.
+ (mve_vcmlaq_rot90_m_f<mode>): Likewise.
+ (mve_vcmulq_m_f<mode>): Likewise.
+ (mve_vcmulq_rot180_m_f<mode>): Likewise.
+ (mve_vcmulq_rot270_m_f<mode>): Likewise.
+ (mve_vcmulq_rot90_m_f<mode>): Likewise.
+ (mve_veorq_m_f<mode>): Likewise.
+ (mve_vfmaq_m_f<mode>): Likewise.
+ (mve_vfmaq_m_n_f<mode>): Likewise.
+ (mve_vfmasq_m_n_f<mode>): Likewise.
+ (mve_vfmsq_m_f<mode>): Likewise.
+ (mve_vmaxnmq_m_f<mode>): Likewise.
+ (mve_vminnmq_m_f<mode>): Likewise.
+ (mve_vmulq_m_f<mode>): Likewise.
+ (mve_vmulq_m_n_f<mode>): Likewise.
+ (mve_vornq_m_f<mode>): Likewise.
+ (mve_vorrq_m_f<mode>): Likewise.
+ (mve_vsubq_m_f<mode>): Likewise.
+ (mve_vsubq_m_n_f<mode>): Likewise.
+
+2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com>
+ Mihail Ionescu <mihail.ionescu@arm.com>
+ Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+
* config/arm/arm-protos.h (arm_mve_immediate_check):
* config/arm/arm.c (arm_mve_immediate_check): Define fuction to check
mode and interger value.
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 0662812..4f8135d 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -1640,6 +1640,68 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t;
#define vshrntq_m_n_s16(__a, __b, __imm, __p) __arm_vshrntq_m_n_s16(__a, __b, __imm, __p)
#define vshrntq_m_n_u32(__a, __b, __imm, __p) __arm_vshrntq_m_n_u32(__a, __b, __imm, __p)
#define vshrntq_m_n_u16(__a, __b, __imm, __p) __arm_vshrntq_m_n_u16(__a, __b, __imm, __p)
+#define vabdq_m_f32(__inactive, __a, __b, __p) __arm_vabdq_m_f32(__inactive, __a, __b, __p)
+#define vabdq_m_f16(__inactive, __a, __b, __p) __arm_vabdq_m_f16(__inactive, __a, __b, __p)
+#define vaddq_m_f32(__inactive, __a, __b, __p) __arm_vaddq_m_f32(__inactive, __a, __b, __p)
+#define vaddq_m_f16(__inactive, __a, __b, __p) __arm_vaddq_m_f16(__inactive, __a, __b, __p)
+#define vaddq_m_n_f32(__inactive, __a, __b, __p) __arm_vaddq_m_n_f32(__inactive, __a, __b, __p)
+#define vaddq_m_n_f16(__inactive, __a, __b, __p) __arm_vaddq_m_n_f16(__inactive, __a, __b, __p)
+#define vandq_m_f32(__inactive, __a, __b, __p) __arm_vandq_m_f32(__inactive, __a, __b, __p)
+#define vandq_m_f16(__inactive, __a, __b, __p) __arm_vandq_m_f16(__inactive, __a, __b, __p)
+#define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
+#define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
+#define vbrsrq_m_n_f32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
+#define vbrsrq_m_n_f16(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f16(__inactive, __a, __b, __p)
+#define vcaddq_rot270_m_f32(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_f32(__inactive, __a, __b, __p)
+#define vcaddq_rot270_m_f16(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_f16(__inactive, __a, __b, __p)
+#define vcaddq_rot90_m_f32(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m_f32(__inactive, __a, __b, __p)
+#define vcaddq_rot90_m_f16(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m_f16(__inactive, __a, __b, __p)
+#define vcmlaq_m_f32(__a, __b, __c, __p) __arm_vcmlaq_m_f32(__a, __b, __c, __p)
+#define vcmlaq_m_f16(__a, __b, __c, __p) __arm_vcmlaq_m_f16(__a, __b, __c, __p)
+#define vcmlaq_rot180_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f32(__a, __b, __c, __p)
+#define vcmlaq_rot180_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f16(__a, __b, __c, __p)
+#define vcmlaq_rot270_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f32(__a, __b, __c, __p)
+#define vcmlaq_rot270_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f16(__a, __b, __c, __p)
+#define vcmlaq_rot90_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f32(__a, __b, __c, __p)
+#define vcmlaq_rot90_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f16(__a, __b, __c, __p)
+#define vcmulq_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_m_f32(__inactive, __a, __b, __p)
+#define vcmulq_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_m_f16(__inactive, __a, __b, __p)
+#define vcmulq_rot180_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_rot180_m_f32(__inactive, __a, __b, __p)
+#define vcmulq_rot180_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_rot180_m_f16(__inactive, __a, __b, __p)
+#define vcmulq_rot270_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_rot270_m_f32(__inactive, __a, __b, __p)
+#define vcmulq_rot270_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_rot270_m_f16(__inactive, __a, __b, __p)
+#define vcmulq_rot90_m_f32(__inactive, __a, __b, __p) __arm_vcmulq_rot90_m_f32(__inactive, __a, __b, __p)
+#define vcmulq_rot90_m_f16(__inactive, __a, __b, __p) __arm_vcmulq_rot90_m_f16(__inactive, __a, __b, __p)
+#define vcvtq_m_n_s32_f32(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_s32_f32(__inactive, __a, __imm6, __p)
+#define vcvtq_m_n_s16_f16(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_s16_f16(__inactive, __a, __imm6, __p)
+#define vcvtq_m_n_u32_f32(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_u32_f32(__inactive, __a, __imm6, __p)
+#define vcvtq_m_n_u16_f16(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_u16_f16(__inactive, __a, __imm6, __p)
+#define veorq_m_f32(__inactive, __a, __b, __p) __arm_veorq_m_f32(__inactive, __a, __b, __p)
+#define veorq_m_f16(__inactive, __a, __b, __p) __arm_veorq_m_f16(__inactive, __a, __b, __p)
+#define vfmaq_m_f32(__a, __b, __c, __p) __arm_vfmaq_m_f32(__a, __b, __c, __p)
+#define vfmaq_m_f16(__a, __b, __c, __p) __arm_vfmaq_m_f16(__a, __b, __c, __p)
+#define vfmaq_m_n_f32(__a, __b, __c, __p) __arm_vfmaq_m_n_f32(__a, __b, __c, __p)
+#define vfmaq_m_n_f16(__a, __b, __c, __p) __arm_vfmaq_m_n_f16(__a, __b, __c, __p)
+#define vfmasq_m_n_f32(__a, __b, __c, __p) __arm_vfmasq_m_n_f32(__a, __b, __c, __p)
+#define vfmasq_m_n_f16(__a, __b, __c, __p) __arm_vfmasq_m_n_f16(__a, __b, __c, __p)
+#define vfmsq_m_f32(__a, __b, __c, __p) __arm_vfmsq_m_f32(__a, __b, __c, __p)
+#define vfmsq_m_f16(__a, __b, __c, __p) __arm_vfmsq_m_f16(__a, __b, __c, __p)
+#define vmaxnmq_m_f32(__inactive, __a, __b, __p) __arm_vmaxnmq_m_f32(__inactive, __a, __b, __p)
+#define vmaxnmq_m_f16(__inactive, __a, __b, __p) __arm_vmaxnmq_m_f16(__inactive, __a, __b, __p)
+#define vminnmq_m_f32(__inactive, __a, __b, __p) __arm_vminnmq_m_f32(__inactive, __a, __b, __p)
+#define vminnmq_m_f16(__inactive, __a, __b, __p) __arm_vminnmq_m_f16(__inactive, __a, __b, __p)
+#define vmulq_m_f32(__inactive, __a, __b, __p) __arm_vmulq_m_f32(__inactive, __a, __b, __p)
+#define vmulq_m_f16(__inactive, __a, __b, __p) __arm_vmulq_m_f16(__inactive, __a, __b, __p)
+#define vmulq_m_n_f32(__inactive, __a, __b, __p) __arm_vmulq_m_n_f32(__inactive, __a, __b, __p)
+#define vmulq_m_n_f16(__inactive, __a, __b, __p) __arm_vmulq_m_n_f16(__inactive, __a, __b, __p)
+#define vornq_m_f32(__inactive, __a, __b, __p) __arm_vornq_m_f32(__inactive, __a, __b, __p)
+#define vornq_m_f16(__inactive, __a, __b, __p) __arm_vornq_m_f16(__inactive, __a, __b, __p)
+#define vorrq_m_f32(__inactive, __a, __b, __p) __arm_vorrq_m_f32(__inactive, __a, __b, __p)
+#define vorrq_m_f16(__inactive, __a, __b, __p) __arm_vorrq_m_f16(__inactive, __a, __b, __p)
+#define vsubq_m_f32(__inactive, __a, __b, __p) __arm_vsubq_m_f32(__inactive, __a, __b, __p)
+#define vsubq_m_f16(__inactive, __a, __b, __p) __arm_vsubq_m_f16(__inactive, __a, __b, __p)
+#define vsubq_m_n_f32(__inactive, __a, __b, __p) __arm_vsubq_m_n_f32(__inactive, __a, __b, __p)
+#define vsubq_m_n_f16(__inactive, __a, __b, __p) __arm_vsubq_m_n_f16(__inactive, __a, __b, __p)
#endif
__extension__ extern __inline void
@@ -12641,6 +12703,439 @@ __arm_vcvtq_m_n_f32_s32 (float32x4_t __inactive, int32x4_t __a, const int __imm6
return __builtin_mve_vcvtq_m_n_to_f_sv4sf (__inactive, __a, __imm6, __p);
}
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vabdq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vabdq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vabdq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vabdq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vaddq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vaddq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vaddq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vaddq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vaddq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, float32_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vaddq_m_n_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vaddq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, float16_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vaddq_m_n_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vandq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vandq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vandq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vandq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vbicq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vbicq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vbicq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vbicq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vbrsrq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, int32_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vbrsrq_m_n_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vbrsrq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, int32_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vbrsrq_m_n_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcaddq_rot270_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcaddq_rot270_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcaddq_rot270_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcaddq_rot270_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcaddq_rot90_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcaddq_rot90_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcaddq_rot90_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcaddq_rot90_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_m_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_m_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_rot180_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_rot180_m_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_rot180_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_rot180_m_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_rot270_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_rot270_m_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_rot270_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_rot270_m_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_rot90_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_rot90_m_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmlaq_rot90_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmlaq_rot90_m_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_rot180_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_rot180_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_rot180_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_rot180_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_rot270_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_rot270_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_rot270_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_rot270_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_rot90_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_rot90_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcmulq_rot90_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vcmulq_rot90_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_m_n_s32_f32 (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
+{
+ return __builtin_mve_vcvtq_m_n_from_f_sv4si (__inactive, __a, __imm6, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_m_n_s16_f16 (int16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p)
+{
+ return __builtin_mve_vcvtq_m_n_from_f_sv8hi (__inactive, __a, __imm6, __p);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_m_n_u32_f32 (uint32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
+{
+ return __builtin_mve_vcvtq_m_n_from_f_uv4si (__inactive, __a, __imm6, __p);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_m_n_u16_f16 (uint16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p)
+{
+ return __builtin_mve_vcvtq_m_n_from_f_uv8hi (__inactive, __a, __imm6, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_veorq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_veorq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_veorq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_veorq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmaq_m_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmaq_m_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmaq_m_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmaq_m_n_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmaq_m_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmaq_m_n_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmasq_m_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmasq_m_n_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmasq_m_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmasq_m_n_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmsq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmsq_m_fv4sf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vfmsq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
+{
+ return __builtin_mve_vfmsq_m_fv8hf (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmaxnmq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vmaxnmq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmaxnmq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vmaxnmq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vminnmq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vminnmq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vminnmq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vminnmq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmulq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vmulq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmulq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vmulq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmulq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, float32_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vmulq_m_n_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmulq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, float16_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vmulq_m_n_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vornq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vornq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vornq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vorrq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vorrq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vorrq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vorrq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsubq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vsubq_m_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsubq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vsubq_m_fv8hf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsubq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, float32_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vsubq_m_n_fv4sf (__inactive, __a, __b, __p);
+}
+
+__extension__ extern __inline float16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vsubq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, float16_t __b, mve_pred16_t __p)
+{
+ return __builtin_mve_vsubq_m_n_fv8hf (__inactive, __a, __b, __p);
+}
#endif
enum {
@@ -14277,6 +14772,10 @@ extern void *__ARM_undef;
#define __arm_vcvtq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_n_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_n_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_n_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_n_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcvtq_m_n_f16_s16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcvtq_m_n_f32_s32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcvtq_m_n_f16_u16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
@@ -14807,6 +15306,303 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+#define vabdq_m(p0,p1,p2,p3) __arm_vabdq_m(p0,p1,p2,p3)
+#define __arm_vabdq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vabdq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vabdq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vaddq_m(p0,p1,p2,p3) __arm_vaddq_m(p0,p1,p2,p3)
+#define __arm_vaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+
+#define vandq_m(p0,p1,p2,p3) __arm_vandq_m(p0,p1,p2,p3)
+#define __arm_vandq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vandq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vandq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vandq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vandq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vandq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vandq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vandq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vandq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vbicq_m(p0,p1,p2,p3) __arm_vbicq_m(p0,p1,p2,p3)
+#define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vbrsrq_m(p0,p1,p2,p3) __arm_vbrsrq_m(p0,p1,p2,p3)
+#define __arm_vbrsrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbrsrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbrsrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbrsrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbrsrq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbrsrq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3));})
+
+#define vcaddq_rot270_m(p0,p1,p2,p3) __arm_vcaddq_rot270_m(p0,p1,p2,p3)
+#define __arm_vcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcaddq_rot270_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcaddq_rot270_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcaddq_rot270_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcaddq_rot270_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot270_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot270_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcaddq_rot90_m(p0,p1,p2,p3) __arm_vcaddq_rot90_m(p0,p1,p2,p3)
+#define __arm_vcaddq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcaddq_rot90_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcaddq_rot90_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot90_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot90_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot90_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot90_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmlaq_m(p0,p1,p2,p3) __arm_vcmlaq_m(p0,p1,p2,p3)
+#define __arm_vcmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmlaq_rot180_m(p0,p1,p2,p3) __arm_vcmlaq_rot180_m(p0,p1,p2,p3)
+#define __arm_vcmlaq_rot180_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot180_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot180_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmlaq_rot270_m(p0,p1,p2,p3) __arm_vcmlaq_rot270_m(p0,p1,p2,p3)
+#define __arm_vcmlaq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmlaq_rot90_m(p0,p1,p2,p3) __arm_vcmlaq_rot90_m(p0,p1,p2,p3)
+#define __arm_vcmlaq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmulq_m(p0,p1,p2,p3) __arm_vcmulq_m(p0,p1,p2,p3)
+#define __arm_vcmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmulq_rot180_m(p0,p1,p2,p3) __arm_vcmulq_rot180_m(p0,p1,p2,p3)
+#define __arm_vcmulq_rot180_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot180_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot180_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmulq_rot270_m(p0,p1,p2,p3) __arm_vcmulq_rot270_m(p0,p1,p2,p3)
+#define __arm_vcmulq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vcmulq_rot90_m(p0,p1,p2,p3) __arm_vcmulq_rot90_m(p0,p1,p2,p3)
+#define __arm_vcmulq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)] [__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot90_m_f16(__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot90_m_f32(__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define veorq_m(p0,p1,p2,p3) __arm_veorq_m(p0,p1,p2,p3)
+#define __arm_veorq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_veorq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_veorq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_veorq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_veorq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_veorq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_veorq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vfmaq_m(p0,p1,p2,p3) __arm_vfmaq_m(p0,p1,p2,p3)
+#define __arm_vfmaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+
+#define vfmasq_m(p0,p1,p2,p3) __arm_vfmasq_m(p0,p1,p2,p3)
+#define __arm_vfmasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+
+#define vfmsq_m(p0,p1,p2,p3) __arm_vfmsq_m(p0,p1,p2,p3)
+#define __arm_vfmsq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmsq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmsq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vmaxnmq_m(p0,p1,p2,p3) __arm_vmaxnmq_m(p0,p1,p2,p3)
+#define __arm_vmaxnmq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmaxnmq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmaxnmq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vminnmq_m(p0,p1,p2,p3) __arm_vminnmq_m(p0,p1,p2,p3)
+#define __arm_vminnmq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vminnmq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vminnmq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vmulq_m(p0,p1,p2,p3) __arm_vmulq_m(p0,p1,p2,p3)
+#define __arm_vmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmulq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmulq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+
+#define vornq_m(p0,p1,p2,p3) __arm_vornq_m(p0,p1,p2,p3)
+#define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
+#define vsubq_m(p0,p1,p2,p3) __arm_vsubq_m(p0,p1,p2,p3)
+#define __arm_vsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsubq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsubq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsubq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsubq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsubq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsubq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vsubq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vsubq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vsubq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vsubq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vsubq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16_t]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32_t]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+
+#define vorrq_m(p0,p1,p2,p3) __arm_vorrq_m(p0,p1,p2,p3)
+#define __arm_vorrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vorrq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vorrq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vorrq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vorrq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vorrq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vorrq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vorrq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
+ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vorrq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
+
#else /* MVE Integer. */
#define vst4q(p0,p1) __arm_vst4q(p0,p1)
@@ -16762,30 +17558,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_veorq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_veorq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define vmaxq_m(p0,p1,p2,p3) __arm_vmaxq_m(p0,p1,p2,p3)
-#define __arm_vmaxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vminq_m(p0,p1,p2,p3) __arm_vminq_m(p0,p1,p2,p3)
-#define __arm_vminq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define vmladavaq_p(p0,p1,p2,p3) __arm_vmladavaq_p(p0,p1,p2,p3)
#define __arm_vmladavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -16798,66 +17570,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint32_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaq_p_u16 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
int (*)[__ARM_mve_type_uint32_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define vmlaq_m(p0,p1,p2,p3) __arm_vmlaq_m(p0,p1,p2,p3)
-#define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlaq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlaq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlaq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlaq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlaq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlaq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
-
-#define vmlasq_m(p0,p1,p2,p3) __arm_vmlasq_m(p0,p1,p2,p3)
-#define __arm_vmlasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlasq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlasq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlasq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlasq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlasq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlasq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
-
-#define vmulhq_m(p0,p1,p2,p3) __arm_vmulhq_m(p0,p1,p2,p3)
-#define __arm_vmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulhq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulhq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulhq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vmullbq_int_m(p0,p1,p2,p3) __arm_vmullbq_int_m(p0,p1,p2,p3)
-#define __arm_vmullbq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vmulltq_int_m(p0,p1,p2,p3) __arm_vmulltq_int_m(p0,p1,p2,p3)
-#define __arm_vmulltq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define vornq_m(p0,p1,p2,p3) __arm_vornq_m(p0,p1,p2,p3)
#define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -16882,14 +17594,43 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vorrq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vorrq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-#define vqdmlahq_m(p0,p1,p2,p3) __arm_vqdmlahq_m(p0,p1,p2,p3)
-#define __arm_vqdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+#define vaddq_m(p0,p1,p2,p3) __arm_vaddq_m(p0,p1,p2,p3)
+#define __arm_vaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
__typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmlahq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmlahq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmlahq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vmulq_m(p0,p1,p2,p3) __arm_vmulq_m(p0,p1,p2,p3)
+#define __arm_vmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#endif /* MVE Integer. */
#define vqrdmlahq_m(p0,p1,p2,p3) __arm_vqrdmlahq_m(p0,p1,p2,p3)
#define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
@@ -17013,126 +17754,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-#define vaddq_m(p0,p1,p2,p3) __arm_vaddq_m(p0,p1,p2,p3)
-#define __arm_vaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vhaddq_m(p0,p1,p2,p3) __arm_vhaddq_m(p0,p1,p2,p3)
-#define __arm_vhaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vhcaddq_rot270_m(p0,p1,p2,p3) __arm_vhcaddq_rot270_m(p0,p1,p2,p3)
-#define __arm_vhcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot270_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot270_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot270_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vhcaddq_rot90_m(p0,p1,p2,p3) __arm_vhcaddq_rot90_m(p0,p1,p2,p3)
-#define __arm_vhcaddq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot90_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vhsubq_m(p0,p1,p2,p3) __arm_vhsubq_m(p0,p1,p2,p3)
-#define __arm_vhsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhsubq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhsubq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhsubq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhsubq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhsubq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhsubq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhsubq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhsubq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhsubq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
-
-#define vmulq_m(p0,p1,p2,p3) __arm_vmulq_m(p0,p1,p2,p3)
-#define __arm_vmulq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmulq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmulq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmulq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vqaddq_m(p0,p1,p2,p3) __arm_vqaddq_m(p0,p1,p2,p3)
-#define __arm_vqaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define vqdmulhq_m(p0,p1,p2,p3) __arm_vqdmulhq_m(p0,p1,p2,p3)
-#define __arm_vqdmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
#define vqsubq_m(p0,p1,p2,p3) __arm_vqsubq_m(p0,p1,p2,p3)
#define __arm_vqsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -17181,67 +17802,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-#define vqrdmladhq_m(p0,p1,p2,p3) __arm_vqrdmladhq_m(p0,p1,p2,p3)
-#define __arm_vqrdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vqrdmladhxq_m(p0,p1,p2,p3) __arm_vqrdmladhxq_m(p0,p1,p2,p3)
-#define __arm_vqrdmladhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vmlsdavaxq_p(p0,p1,p2,p3) __arm_vmlsdavaxq_p(p0,p1,p2,p3)
-#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vmlsdavaq_p(p0,p1,p2,p3) __arm_vmlsdavaq_p(p0,p1,p2,p3)
-#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vmladavaxq_p(p0,p1,p2,p3) __arm_vmladavaxq_p(p0,p1,p2,p3)
-#define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaxq_p_s8 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
- int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaxq_p_s16 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaxq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define vmullbq_poly_m(p0,p1,p2,p3) __arm_vmullbq_poly_m(p0,p1,p2,p3)
-#define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
-#define vmulltq_poly_m(p0,p1,p2,p3) __arm_vmulltq_poly_m(p0,p1,p2,p3)
-#define __arm_vmulltq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
#define vshllbq_m(p0,p1,p2,p3) __arm_vshllbq_m(p0,p1,p2,p3)
#define __arm_vshllbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -17301,26 +17861,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
-#define vqdmullbq_m(p0,p1,p2,p3) __arm_vqdmullbq_m(p0,p1,p2,p3)
-#define __arm_vqdmullbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmullbq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmullbq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
-
-#define vqdmulltq_m(p0,p1,p2,p3) __arm_vqdmulltq_m(p0,p1,p2,p3)
-#define __arm_vqdmulltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
- __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulltq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
- int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulltq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
- int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
- int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
#define vqrshrnbq_m(p0,p1,p2,p3) __arm_vqrshrnbq_m(p0,p1,p2,p3)
#define __arm_vqrshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -17433,8 +17973,6 @@ extern void *__ARM_undef;
#define vrmlsldavhaxq_p(p0,p1,p2,p3) __arm_vrmlsldavhaxq_p(p0,p1,p2,p3)
#define __arm_vrmlsldavhaxq_p(p0,p1,p2,p3) __arm_vrmlsldavhaxq_p_s32(p0,p1,p2,p3)
-#endif /* MVE Integer. */
-
#define vqdmladhq_m(p0,p1,p2,p3) __arm_vqdmladhq_m(p0,p1,p2,p3)
#define __arm_vqdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
__typeof(p1) __p1 = (p1); \
@@ -17561,6 +18099,264 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
+#define vhaddq_m(p0,p1,p2,p3) __arm_vhaddq_m(p0,p1,p2,p3)
+#define __arm_vhaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vhcaddq_rot270_m(p0,p1,p2,p3) __arm_vhcaddq_rot270_m(p0,p1,p2,p3)
+#define __arm_vhcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot270_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot270_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot270_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vhcaddq_rot90_m(p0,p1,p2,p3) __arm_vhcaddq_rot90_m(p0,p1,p2,p3)
+#define __arm_vhcaddq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhcaddq_rot90_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vhsubq_m(p0,p1,p2,p3) __arm_vhsubq_m(p0,p1,p2,p3)
+#define __arm_vhsubq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vhsubq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhsubq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhsubq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vhsubq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vhsubq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vhsubq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vhsubq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vhsubq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vhsubq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vhsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vhsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vhsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
+
+#define vmaxq_m(p0,p1,p2,p3) __arm_vmaxq_m(p0,p1,p2,p3)
+#define __arm_vmaxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vminq_m(p0,p1,p2,p3) __arm_vminq_m(p0,p1,p2,p3)
+#define __arm_vminq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vmlaq_m(p0,p1,p2,p3) __arm_vmlaq_m(p0,p1,p2,p3)
+#define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlaq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlaq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlaq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlaq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlaq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlaq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
+
+#define vmlasq_m(p0,p1,p2,p3) __arm_vmlasq_m(p0,p1,p2,p3)
+#define __arm_vmlasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vmlasq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vmlasq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vmlasq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vmlasq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vmlasq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vmlasq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3));})
+
+#define vmulhq_m(p0,p1,p2,p3) __arm_vmulhq_m(p0,p1,p2,p3)
+#define __arm_vmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulhq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulhq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulhq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vmullbq_int_m(p0,p1,p2,p3) __arm_vmullbq_int_m(p0,p1,p2,p3)
+#define __arm_vmullbq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vmulltq_int_m(p0,p1,p2,p3) __arm_vmulltq_int_m(p0,p1,p2,p3)
+#define __arm_vmulltq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vmulltq_poly_m(p0,p1,p2,p3) __arm_vmulltq_poly_m(p0,p1,p2,p3)
+#define __arm_vmulltq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
+
+#define vqaddq_m(p0,p1,p2,p3) __arm_vqaddq_m(p0,p1,p2,p3)
+#define __arm_vqaddq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqaddq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqaddq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqaddq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8_t]: __arm_vqaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16_t]: __arm_vqaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t]: __arm_vqaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqaddq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqaddq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqaddq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqaddq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqaddq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqaddq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+
+#define vqdmlahq_m(p0,p1,p2,p3) __arm_vqdmlahq_m(p0,p1,p2,p3)
+#define __arm_vqdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmlahq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmlahq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmlahq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+
+#define vqdmulhq_m(p0,p1,p2,p3) __arm_vqdmulhq_m(p0,p1,p2,p3)
+#define __arm_vqdmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8_t]: __arm_vqdmulhq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulhq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulhq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vqdmullbq_m(p0,p1,p2,p3) __arm_vqdmullbq_m(p0,p1,p2,p3)
+#define __arm_vqdmullbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmullbq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmullbq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+
+#define vqdmulltq_m(p0,p1,p2,p3) __arm_vqdmulltq_m(p0,p1,p2,p3)
+#define __arm_vqdmulltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16_t]: __arm_vqdmulltq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+ int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32_t]: __arm_vqdmulltq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vqrdmladhq_m(p0,p1,p2,p3) __arm_vqrdmladhq_m(p0,p1,p2,p3)
+#define __arm_vqrdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vqrdmladhxq_m(p0,p1,p2,p3) __arm_vqrdmladhxq_m(p0,p1,p2,p3)
+#define __arm_vqrdmladhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vmlsdavaxq_p(p0,p1,p2,p3) __arm_vmlsdavaxq_p(p0,p1,p2,p3)
+#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vmlsdavaq_p(p0,p1,p2,p3) __arm_vmlsdavaq_p(p0,p1,p2,p3)
+#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vmladavaxq_p(p0,p1,p2,p3) __arm_vmladavaxq_p(p0,p1,p2,p3)
+#define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaxq_p_s8 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
+ int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaxq_p_s16 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
+ int (*)[__ARM_mve_type_int32_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaxq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
+
+#define vmullbq_poly_m(p0,p1,p2,p3) __arm_vmullbq_poly_m(p0,p1,p2,p3)
+#define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+ __typeof(p1) __p1 = (p1); \
+ __typeof(p2) __p2 = (p2); \
+ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
+ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
+
#ifdef __cplusplus
}
#endif
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 6048591..b448889 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -654,3 +654,34 @@ VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlsldavhaxq_p_s, v4si)
VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlsldavhaq_p_s, v4si)
VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlaldavhaxq_p_s, v4si)
VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlaldavhaq_p_s, v4si)
+VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vcvtq_m_n_from_f_u, v8hi, v4si)
+VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vcvtq_m_n_from_f_s, v8hi, v4si)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbrsrq_m_n_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_n_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vorrq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vornq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_n_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vminnmq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmaxnmq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmsq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmasq_m_n_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmaq_m_n_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmaq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, veorq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot90_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot270_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot180_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot90_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot270_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot180_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot90_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot270_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbicq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vandq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_n_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_f, v8hf, v4sf)
+VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vabdq_m_f, v8hf, v4sf)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 3dd33d8..bf4eb5d 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -186,7 +186,12 @@
VQDMULLTQ_M_N_S VQDMULLTQ_M_S VQRSHRUNBQ_M_N_S
VQRSHRUNTQ_M_N_SVQSHRUNBQ_M_N_S VQSHRUNTQ_M_N_S
VRMLALDAVHAQ_P_U VRMLALDAVHAXQ_P_S VRMLSLDAVHAQ_P_S
- VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S])
+ VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S
+ VCMLAQ_M_F VCMLAQ_ROT180_M_F VCMLAQ_ROT270_M_F
+ VCMLAQ_ROT90_M_F VCMULQ_M_F VCMULQ_ROT180_M_F
+ VCMULQ_ROT270_M_F VCMULQ_ROT90_M_F VFMAQ_M_F
+ VFMAQ_M_N_F VFMASQ_M_N_F VFMSQ_M_F VMAXNMQ_M_F
+ VMINNMQ_M_F VSUBQ_M_F])
(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF")
(V8HF "V8HI") (V4SF "V4SI")])
@@ -7443,3 +7448,495 @@
"vpst\;vrmlsldavhaxt.s32\t%Q0, %R0, %q2, %q3"
[(set_attr "type" "mve_move")
(set_attr "length""8")])
+;;
+;; [vabdq_m_f])
+;;
+(define_insn "mve_vabdq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VABDQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vabdt.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vaddq_m_f])
+;;
+(define_insn "mve_vaddq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VADDQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vaddt.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vaddq_m_n_f])
+;;
+(define_insn "mve_vaddq_m_n_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VADDQ_M_N_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vaddt.f%#<V_sz_elem> %q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vandq_m_f])
+;;
+(define_insn "mve_vandq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VANDQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vandt %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vbicq_m_f])
+;;
+(define_insn "mve_vbicq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VBICQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vbict %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vbrsrq_m_n_f])
+;;
+(define_insn "mve_vbrsrq_m_n_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:SI 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VBRSRQ_M_N_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vbrsrt.%#<V_sz_elem> %q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcaddq_rot270_m_f])
+;;
+(define_insn "mve_vcaddq_rot270_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCADDQ_ROT270_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcaddt.f%#<V_sz_elem> %q0, %q2, %q3, #270"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcaddq_rot90_m_f])
+;;
+(define_insn "mve_vcaddq_rot90_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCADDQ_ROT90_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcaddt.f%#<V_sz_elem> %q0, %q2, %q3, #90"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmlaq_m_f])
+;;
+(define_insn "mve_vcmlaq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMLAQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #0"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmlaq_rot180_m_f])
+;;
+(define_insn "mve_vcmlaq_rot180_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMLAQ_ROT180_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #180"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmlaq_rot270_m_f])
+;;
+(define_insn "mve_vcmlaq_rot270_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMLAQ_ROT270_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #270"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmlaq_rot90_m_f])
+;;
+(define_insn "mve_vcmlaq_rot90_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMLAQ_ROT90_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmlat.f%#<V_sz_elem> %q0, %q2, %q3, #90"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmulq_m_f])
+;;
+(define_insn "mve_vcmulq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMULQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #0"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmulq_rot180_m_f])
+;;
+(define_insn "mve_vcmulq_rot180_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMULQ_ROT180_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #180"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmulq_rot270_m_f])
+;;
+(define_insn "mve_vcmulq_rot270_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMULQ_ROT270_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #270"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vcmulq_rot90_m_f])
+;;
+(define_insn "mve_vcmulq_rot90_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VCMULQ_ROT90_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vcmult.f%#<V_sz_elem> %q0, %q2, %q3, #90"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [veorq_m_f])
+;;
+(define_insn "mve_veorq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VEORQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;veort %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vfmaq_m_f])
+;;
+(define_insn "mve_vfmaq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VFMAQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vfmat.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vfmaq_m_n_f])
+;;
+(define_insn "mve_vfmaq_m_n_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VFMAQ_M_N_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vfmat.f%#<V_sz_elem> %q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vfmasq_m_n_f])
+;;
+(define_insn "mve_vfmasq_m_n_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VFMASQ_M_N_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vfmast.f%#<V_sz_elem> %q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vfmsq_m_f])
+;;
+(define_insn "mve_vfmsq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VFMSQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vfmst.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vmaxnmq_m_f])
+;;
+(define_insn "mve_vmaxnmq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VMAXNMQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vmaxnmt.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vminnmq_m_f])
+;;
+(define_insn "mve_vminnmq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VMINNMQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vminnmt.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vmulq_m_f])
+;;
+(define_insn "mve_vmulq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VMULQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vmult.f%#<V_sz_elem> %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vmulq_m_n_f])
+;;
+(define_insn "mve_vmulq_m_n_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VMULQ_M_N_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vmult.f%#<V_sz_elem> %q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vornq_m_f])
+;;
+(define_insn "mve_vornq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VORNQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vornt %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vorrq_m_f])
+;;
+(define_insn "mve_vorrq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VORRQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vorrt %q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vsubq_m_f])
+;;
+(define_insn "mve_vsubq_m_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:MVE_0 3 "s_register_operand" "w")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VSUBQ_M_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vsubt.f%#<V_sz_elem>\t%q0, %q2, %q3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
+
+;;
+;; [vsubq_m_n_f])
+;;
+(define_insn "mve_vsubq_m_n_f<mode>"
+ [
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+ (match_operand:MVE_0 2 "s_register_operand" "w")
+ (match_operand:<V_elem> 3 "s_register_operand" "r")
+ (match_operand:HI 4 "vpr_register_operand" "Up")]
+ VSUBQ_M_N_F))
+ ]
+ "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+ "vpst\;vsubt.f%#<V_sz_elem>\t%q0, %q2, %3"
+ [(set_attr "type" "mve_move")
+ (set_attr "length""8")])
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 650a6b9..4ed6dc7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -2,6 +2,73 @@
Mihail Ionescu <mihail.ionescu@arm.com>
Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+ * gcc.target/arm/mve/intrinsics/vabdq_m_f16.c: New test.
+ * gcc.target/arm/mve/intrinsics/vabdq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vaddq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vaddq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vandq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vandq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vbicq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vbicq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/veorq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/veorq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vmulq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vmulq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vornq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vornq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vorrq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vorrq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsubq_m_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsubq_m_f32.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c: Likewise.
+ * gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c: Likewise.
+
+2020-03-18 Andre Vieira <andre.simoesdiasvieira@arm.com>
+ Mihail Ionescu <mihail.ionescu@arm.com>
+ Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+
* gcc.target/arm/mve/intrinsics/vmlaldavaq_p_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmlaldavaq_p_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vmlaldavaq_p_u16.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c
new file mode 100644
index 0000000..dac3dd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vabdq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vabdt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vabdq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vabdt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c
new file mode 100644
index 0000000..d1b59ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vabdq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vabdq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vabdt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vabdq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vabdt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c
new file mode 100644
index 0000000..b52b719
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vaddq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vaddq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c
new file mode 100644
index 0000000..c3e7c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vaddq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vaddq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c
new file mode 100644
index 0000000..94d6b6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
+{
+ return vaddq_m_n_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
+{
+ return vaddq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c
new file mode 100644
index 0000000..ab100cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
+{
+ return vaddq_m_n_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
+{
+ return vaddq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vaddt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c
new file mode 100644
index 0000000..c641fce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vandq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vandt" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vandq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vandt" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c
new file mode 100644
index 0000000..a3344d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vandq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vandq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vandt" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vandq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vandt" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c
new file mode 100644
index 0000000..f3237a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vbicq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbict" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vbicq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbict" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c
new file mode 100644
index 0000000..975d9db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbicq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vbicq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbict" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vbicq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbict" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c
new file mode 100644
index 0000000..afb5ef8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, int32_t b, mve_pred16_t p)
+{
+ return vbrsrq_m_n_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbrsrt.16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, int32_t b, mve_pred16_t p)
+{
+ return vbrsrq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbrsrt.16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c
new file mode 100644
index 0000000..3b612cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vbrsrq_m_n_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, int32_t b, mve_pred16_t p)
+{
+ return vbrsrq_m_n_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbrsrt.32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, int32_t b, mve_pred16_t p)
+{
+ return vbrsrq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vbrsrt.32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c
new file mode 100644
index 0000000..52af15c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcaddq_rot270_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcaddq_rot270_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c
new file mode 100644
index 0000000..b5fb2e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot270_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcaddq_rot270_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcaddq_rot270_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c
new file mode 100644
index 0000000..8db12c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcaddq_rot90_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcaddq_rot90_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c
new file mode 100644
index 0000000..fae494c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcaddq_rot90_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcaddq_rot90_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcaddq_rot90_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcaddt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c
new file mode 100644
index 0000000..3f86c8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_m_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c
new file mode 100644
index 0000000..3cc3342
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_m_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c
new file mode 100644
index 0000000..197d8ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot180_m_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot180_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c
new file mode 100644
index 0000000..3bd195f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot180_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot180_m_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot180_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c
new file mode 100644
index 0000000..29e8638
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot270_m_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot270_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c
new file mode 100644
index 0000000..a4ca602
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot270_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot270_m_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot270_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c
new file mode 100644
index 0000000..55afaf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot90_m_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot90_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c
new file mode 100644
index 0000000..89be04b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmlaq_rot90_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot90_m_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vcmlaq_rot90_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmlat.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c
new file mode 100644
index 0000000..c83be6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c
new file mode 100644
index 0000000..acea3b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c
new file mode 100644
index 0000000..78e7606
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_rot180_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_rot180_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c
new file mode 100644
index 0000000..8643de0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot180_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_rot180_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_rot180_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c
new file mode 100644
index 0000000..632e21c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_rot270_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_rot270_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c
new file mode 100644
index 0000000..e5f35de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot270_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_rot270_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_rot270_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c
new file mode 100644
index 0000000..dfd5d0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_rot90_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vcmulq_rot90_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c
new file mode 100644
index 0000000..7b87791
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmulq_rot90_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_rot90_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vcmulq_rot90_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcmult.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c
new file mode 100644
index 0000000..62c3086
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s16_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n_s16_f16 (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.s16.f16" } } */
+
+int16x8_t
+foo1 (int16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.s16.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c
new file mode 100644
index 0000000..0eeba94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_s32_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (int32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n_s32_f32 (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.s32.f32" } } */
+
+int32x4_t
+foo1 (int32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.s32.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c
new file mode 100644
index 0000000..fbc3b9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u16_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo (uint16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n_u16_f16 (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.u16.f16" } } */
+
+uint16x8_t
+foo1 (uint16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.u16.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c
new file mode 100644
index 0000000..b7719de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_m_n_u32_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo (uint32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n_u32_f32 (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.u32.f32" } } */
+
+uint32x4_t
+foo1 (uint32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+ return vcvtq_m_n (inactive, a, 1, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vcvtt.u32.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c
new file mode 100644
index 0000000..8a70933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return veorq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "veort" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return veorq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "veort" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c
new file mode 100644
index 0000000..37ab556
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/veorq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return veorq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "veort" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return veorq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "veort" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c
new file mode 100644
index 0000000..f27d664
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vfmaq_m_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vfmaq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c
new file mode 100644
index 0000000..7e7d38f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vfmaq_m_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vfmaq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c
new file mode 100644
index 0000000..93c8aa3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p)
+{
+ return vfmaq_m_n_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p)
+{
+ return vfmaq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c
new file mode 100644
index 0000000..1f9189a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p)
+{
+ return vfmaq_m_n_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p)
+{
+ return vfmaq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmat.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c
new file mode 100644
index 0000000..4f91179
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p)
+{
+ return vfmasq_m_n_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmast.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p)
+{
+ return vfmasq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmast.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c
new file mode 100644
index 0000000..e630f44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p)
+{
+ return vfmasq_m_n_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmast.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p)
+{
+ return vfmasq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmast.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c
new file mode 100644
index 0000000..2cda2e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vfmsq_m_f16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmst.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p)
+{
+ return vfmsq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmst.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c
new file mode 100644
index 0000000..773edf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmsq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vfmsq_m_f32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmst.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p)
+{
+ return vfmsq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vfmst.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c
new file mode 100644
index 0000000..43858dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vmaxnmq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmaxnmt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vmaxnmq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmaxnmt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c
new file mode 100644
index 0000000..a3b7e8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vmaxnmq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmaxnmt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vmaxnmq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmaxnmt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c
new file mode 100644
index 0000000..d01e266
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vminnmq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vminnmt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vminnmq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vminnmt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c
new file mode 100644
index 0000000..5193b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vminnmq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vminnmt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vminnmq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vminnmt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c
new file mode 100644
index 0000000..43b751b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vmulq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vmulq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c
new file mode 100644
index 0000000..6dee764
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vmulq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vmulq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c
new file mode 100644
index 0000000..a0b2d53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
+{
+ return vmulq_m_n_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
+{
+ return vmulq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c
new file mode 100644
index 0000000..c457195
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
+{
+ return vmulq_m_n_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
+{
+ return vmulq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vmult.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c
new file mode 100644
index 0000000..9833268
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vornq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vornt" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vornq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vornt" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c
new file mode 100644
index 0000000..48a720a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vornq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vornq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vornt" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vornq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vornt" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c
new file mode 100644
index 0000000..6e8591e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vorrq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vorrt" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vorrq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vorrt" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c
new file mode 100644
index 0000000..09ee673
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vorrq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vorrq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vorrt" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vorrq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vorrt" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c
new file mode 100644
index 0000000..383491d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vsubq_m_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+ return vsubq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c
new file mode 100644
index 0000000..327b524
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vsubq_m_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+ return vsubq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c
new file mode 100644
index 0000000..5657c36
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float16x8_t
+foo (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
+{
+ return vsubq_m_n_f16 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f16" } } */
+
+float16x8_t
+foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p)
+{
+ return vsubq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c
new file mode 100644
index 0000000..c9502cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+float32x4_t
+foo (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
+{
+ return vsubq_m_n_f32 (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f32" } } */
+
+float32x4_t
+foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p)
+{
+ return vsubq_m (inactive, a, b, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vsubt.f32" } } */