aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2020-12-13 13:59:33 +0000
committerTamar Christina <tamar.christina@arm.com>2020-12-13 14:12:34 +0000
commit3b8a82f97dd48e153ce93b317c44254839e11461 (patch)
tree2f34b1dc783f32ac5c0d3d6dc6033d6f68e5d69e /gcc
parent2f05dadaeda6068ad570117be21f2c16e2f4fa10 (diff)
downloadgcc-3b8a82f97dd48e153ce93b317c44254839e11461.zip
gcc-3b8a82f97dd48e153ce93b317c44254839e11461.tar.gz
gcc-3b8a82f97dd48e153ce93b317c44254839e11461.tar.bz2
Arm: Add NEON and MVE RTL patterns for Complex Addition, Multiply and FMA.
This adds implementation for the optabs for complex additions. With this the following C code: void f90 (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I); } generates f90: add r3, r2, #1600 .L2: vld1.32 {q8}, [r0]! vld1.32 {q9}, [r1]! vcadd.f32 q8, q8, q9, #90 vst1.32 {q8}, [r2]! cmp r3, r2 bne .L2 bx lr instead of f90: add r3, r2, #1600 .L2: vld2.32 {d24-d27}, [r0]! vld2.32 {d20-d23}, [r1]! vsub.f32 q8, q12, q11 vadd.f32 q9, q13, q10 vst2.32 {d16-d19}, [r2]! cmp r3, r2 bne .L2 bx lr gcc/ChangeLog: * config/arm/arm_mve.h (__arm_vcaddq_rot90_u8, __arm_vcaddq_rot270_u8, , __arm_vcaddq_rot90_s8, __arm_vcaddq_rot270_s8, __arm_vcaddq_rot90_u16, __arm_vcaddq_rot270_u16, __arm_vcaddq_rot90_s16, __arm_vcaddq_rot270_s16, __arm_vcaddq_rot90_u32, __arm_vcaddq_rot270_u32, __arm_vcaddq_rot90_s32, __arm_vcaddq_rot270_s32, __arm_vcmulq_rot90_f16, __arm_vcmulq_rot270_f16, __arm_vcmulq_rot180_f16, __arm_vcmulq_f16, __arm_vcaddq_rot90_f16, __arm_vcaddq_rot270_f16, __arm_vcmulq_rot90_f32, __arm_vcmulq_rot270_f32, __arm_vcmulq_rot180_f32, __arm_vcmulq_f32, __arm_vcaddq_rot90_f32, __arm_vcaddq_rot270_f32, __arm_vcmlaq_f16, __arm_vcmlaq_rot180_f16, __arm_vcmlaq_rot270_f16, __arm_vcmlaq_rot90_f16, __arm_vcmlaq_f32, __arm_vcmlaq_rot180_f32, __arm_vcmlaq_rot270_f32, __arm_vcmlaq_rot90_f32): Update builtin calls. * config/arm/arm_mve_builtins.def (vcaddq_rot90_u, vcaddq_rot270_u, vcaddq_rot90_s, vcaddq_rot270_s, vcaddq_rot90_f, vcaddq_rot270_f, vcmulq_f, vcmulq_rot90_f, vcmulq_rot180_f, vcmulq_rot270_f, vcmlaq_f, vcmlaq_rot90_f, vcmlaq_rot180_f, vcmlaq_rot270_f): Removed. (vcaddq_rot90, vcaddq_rot270, vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270, vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270): New. * config/arm/constraints.md (Dz): Include MVE. * config/arm/iterators.md (mve_rotsplit1, mve_rotsplit2): New. (rot): Add UNSPEC_VCMLS, UNSPEC_VCMUL and UNSPEC_VCMUL180. (rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New. * config/arm/mve.md (VCADDQ_ROT270_S, VCADDQ_ROT90_S, VCADDQ_ROT270_U, VCADDQ_ROT90_U, VCADDQ_ROT270_F, VCADDQ_ROT90_F, VCMULQ_F, VCMULQ_ROT180_F, VCMULQ_ROT270_F, VCMULQ_ROT90_F, VCMLAQ_F, VCMLAQ_ROT180_F, VCMLAQ_ROT90_F, VCMLAQ_ROT270_F, VCADDQ_ROT270_S, VCADDQ_ROT270, VCADDQ_ROT90): Removed. (mve_rot, VCMUL): New. (mve_vcaddq_rot270_<supf><mode, mve_vcaddq_rot90_<supf><mode>, mve_vcaddq_rot270_f<mode>, mve_vcaddq_rot90_f<mode>, mve_vcmulq_f<mode, mve_vcmulq_rot180_f<mode>, mve_vcmulq_rot270_f<mode>, mve_vcmulq_rot90_f<mode>, mve_vcmlaq_f<mode>, mve_vcmlaq_rot180_f<mode>, mve_vcmlaq_rot270_f<mode>, mve_vcmlaq_rot90_f<mode>): Removed. (mve_vcmlaq<mve_rot><mode>, mve_vcmulq<mve_rot><mode>, mve_vcaddq<mve_rot><mode>, cadd<rot><mode>3, mve_vcaddq<mve_rot><mode>): New. (cmul<rot_op><mode>3): Exclude MVE types. * config/arm/unspecs.md (UNSPEC_VCMUL90, UNSPEC_VCMUL270): New. * config/arm/vec-common.md (cadd<rot><mode>3, cmul<rot_op><mode>3, arm_vcmla<rot><mode>, cml<fcmac1><rot_op><mode>4): New. * config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180, UNSPEC_VCMLS, UNSPEC_VCMLS180): New. * config/arm/neon.md (cmul<rot_op><mode>3): New.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm_mve.h70
-rw-r--r--gcc/config/arm/arm_mve_builtins.def26
-rw-r--r--gcc/config/arm/constraints.md2
-rw-r--r--gcc/config/arm/iterators.md44
-rw-r--r--gcc/config/arm/mve.md172
-rw-r--r--gcc/config/arm/neon.md20
-rw-r--r--gcc/config/arm/unspecs.md20
-rw-r--r--gcc/config/arm/vec-common.md70
8 files changed, 216 insertions, 208 deletions
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 6c0d1e2..4501462 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b);
+ return (uint8x16_t)
+ __builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b);
+ return (uint8x16_t)
+ __builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b);
}
__extension__ extern __inline uint8x16_t
@@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v16qi (__a, __b);
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
}
__extension__ extern __inline int8x16_t
@@ -4821,14 +4823,16 @@ __extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b);
+ return (uint16x8_t)
+ __builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b);
+ return (uint16x8_t)
+ __builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b);
}
__extension__ extern __inline uint16x8_t
@@ -5360,14 +5364,14 @@ __extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hi (__a, __b);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
}
__extension__ extern __inline int16x8_t
@@ -5661,14 +5665,16 @@ __extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv4si (__a, __b);
+ return (uint32x4_t)
+ __builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv4si (__a, __b);
+ return (uint32x4_t)
+ __builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b);
}
__extension__ extern __inline uint32x4_t
@@ -6200,14 +6206,14 @@ __extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4si (__a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4si (__a, __b);
}
__extension__ extern __inline int32x4_t
@@ -17342,42 +17348,42 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_rot90_fv8hf (__a, __b);
+ return __builtin_mve_vcmulq_rot90v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_rot270_fv8hf (__a, __b);
+ return __builtin_mve_vcmulq_rot270v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_rot180_fv8hf (__a, __b);
+ return __builtin_mve_vcmulq_rot180v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_fv8hf (__a, __b);
+ return __builtin_mve_vcmulqv8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
@@ -17594,42 +17600,42 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot90_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_rot90_fv4sf (__a, __b);
+ return __builtin_mve_vcmulq_rot90v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot270_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_rot270_fv4sf (__a, __b);
+ return __builtin_mve_vcmulq_rot270v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot180_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_rot180_fv4sf (__a, __b);
+ return __builtin_mve_vcmulq_rot180v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_fv4sf (__a, __b);
+ return __builtin_mve_vcmulqv4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
@@ -17784,28 +17790,28 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaqv8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_rot180_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_rot270_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_rot90_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
@@ -18092,28 +18098,28 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaqv4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_rot180_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_rot270_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_rot90_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index f38926f..56b652f 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si)
@@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si)
-VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si)
-VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si)
@@ -264,12 +260,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vmaxnmq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vmaxnmavq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vmaxnmaq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, veorq_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf)
@@ -470,10 +460,6 @@ VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmsq_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmasq_n_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_n_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrntq_n_s, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrnbq_n_s, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_IMM, vrshrntq_n_s, v8hi, v4si)
@@ -892,3 +878,15 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si)
+
+/* optabs without any suffixes. */
+VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf)
+VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq, v8hf, v4sf)
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 789e333..6ebddb9 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -310,7 +310,7 @@
"@internal
In ARM/Thumb-2 state a vector of constant zeros."
(and (match_code "const_vector")
- (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
+ (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))
(define_constraint "Da"
"@internal
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 5fcb7af..5fc75cb 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1177,11 +1177,40 @@
(define_int_attr rot [(UNSPEC_VCADD90 "90")
(UNSPEC_VCADD270 "270")
+ (UNSPEC_VCMLS "0")
(UNSPEC_VCMLA "0")
(UNSPEC_VCMLA90 "90")
(UNSPEC_VCMLA180 "180")
(UNSPEC_VCMLA270 "270")])
+(define_int_attr mve_rotsplit1 [(UNSPEC_VCMLA "")
+ (UNSPEC_VCMLA180 "")
+ (UNSPEC_VCMUL "")
+ (UNSPEC_VCMUL180 "")
+ (UNSPEC_VCMLS "_rot270")
+ (UNSPEC_VCMLS180 "_rot90")])
+
+(define_int_attr mve_rotsplit2 [(UNSPEC_VCMLA "_rot90")
+ (UNSPEC_VCMLA180 "_rot270")
+ (UNSPEC_VCMUL "_rot90")
+ (UNSPEC_VCMUL180 "_rot270")
+ (UNSPEC_VCMLS "_rot180")
+ (UNSPEC_VCMLS180 "_rot180")])
+
+(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
+ (UNSPEC_VCADD270 "_rot270")
+ (UNSPEC_VCMLA "")
+ (UNSPEC_VCMLA90 "_rot90")
+ (UNSPEC_VCMLA180 "_rot180")
+ (UNSPEC_VCMLA270 "_rot270")
+ (UNSPEC_VCMUL "")
+ (UNSPEC_VCMUL90 "_rot90")
+ (UNSPEC_VCMUL180 "_rot180")
+ (UNSPEC_VCMUL270 "_rot270")])
+
+(define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90
+ UNSPEC_VCMUL180 UNSPEC_VCMUL270])
+
(define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
(UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
(UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
@@ -1233,9 +1262,8 @@
(VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
(VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
(VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s") (VBICQ_U "u")
- (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
- (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
- (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
+ (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
+ (VCMPEQQ_S "s") (VCMPEQQ_U "u")
(VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
(VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
(VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
@@ -1502,8 +1530,6 @@
(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
-(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
-(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
@@ -1712,3 +1738,11 @@
(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
+;; Define iterators for VCMLA operations
+(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
+ UNSPEC_VCMLA180
+ UNSPEC_VCMLS])
+
+;; Define iterators for VCMLA operations as MUL
+(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
+ UNSPEC_VCMUL180])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 4b2e46a..d29f387 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -950,34 +950,28 @@
])
;;
-;; [vcaddq_rot270_s, vcaddq_rot270_u])
+;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
-(define_insn "mve_vcaddq_rot270_<supf><mode>"
+(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
- VCADDQ_ROT270))
+ VCADD))
]
"TARGET_HAVE_MVE"
- "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270"
+ "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
-;;
-;; [vcaddq_rot90_u, vcaddq_rot90_s])
-;;
-(define_insn "mve_vcaddq_rot90_<supf><mode>"
- [
- (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
- (match_operand:MVE_2 2 "s_register_operand" "w")]
- VCADDQ_ROT90))
- ]
- "TARGET_HAVE_MVE"
- "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90"
- [(set_attr "type" "mve_move")
-])
+;; Auto vectorizer pattern for int vcadd
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:MVE_2 0 "register_operand")
+ (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand")
+ (match_operand:MVE_2 2 "register_operand")]
+ VCADD))]
+ "TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN"
+)
;;
;; [vcmpcsq_n_u])
@@ -2084,32 +2078,17 @@
])
;;
-;; [vcaddq_rot270_f])
-;;
-(define_insn "mve_vcaddq_rot270_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCADDQ_ROT270_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcaddq_rot90_f])
+;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
-(define_insn "mve_vcaddq_rot90_f<mode>"
+(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
- VCADDQ_ROT90_F))
+ VCADD))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90"
+ "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
@@ -2294,62 +2273,17 @@
])
;;
-;; [vcmulq_f])
-;;
-(define_insn "mve_vcmulq_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #0"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmulq_rot180_f])
-;;
-(define_insn "mve_vcmulq_rot180_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_ROT180_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #180"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmulq_rot270_f])
-;;
-(define_insn "mve_vcmulq_rot270_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_ROT270_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmulq_rot90_f])
+;; [vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270])
;;
-(define_insn "mve_vcmulq_rot90_f<mode>"
+(define_insn "mve_vcmulq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_ROT90_F))
+ VCMUL))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #90"
+ "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
@@ -4122,66 +4056,20 @@
[(set_attr "type" "mve_move")
(set_attr "length""8")])
;;
-;; [vcmlaq_f])
-;;
-(define_insn "mve_vcmlaq_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #0"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmlaq_rot180_f])
-;;
-(define_insn "mve_vcmlaq_rot180_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_ROT180_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #180"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmlaq_rot270_f])
-;;
-(define_insn "mve_vcmlaq_rot270_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_ROT270_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmlaq_rot90_f])
+;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270])
;;
-(define_insn "mve_vcmlaq_rot90_f<mode>"
+(define_insn "mve_vcmlaq<mve_rot><mode>"
[
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_ROT90_F))
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w,w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz")
+ (match_operand:MVE_0 2 "s_register_operand" "w,w")
+ (match_operand:MVE_0 3 "s_register_operand" "w,w")]
+ VCMLA))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #90"
+ "@
+ vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>
+ vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>"
[(set_attr "type" "mve_move")
])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 669c34d..487c0a1 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3030,6 +3030,26 @@
[(set_attr "type" "neon_fcmla")]
)
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+ [(set (match_operand:VDF 0 "register_operand")
+ (unspec:VDF [(match_operand:VDF 1 "register_operand")
+ (match_operand:VDF 2 "register_operand")]
+ VCMUL_OP))]
+ "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ rtx res1 = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
+ operands[1], operands[2]));
+ emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
+ operands[1], operands[2]));
+ DONE;
+})
+
;; These instructions map to the __builtins for the Dot Product operations.
(define_insn "neon_<sup>dot<vsi2qi>"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index c2076c9..8bb0060 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -510,6 +510,12 @@
UNSPEC_VCMLA90
UNSPEC_VCMLA180
UNSPEC_VCMLA270
+ UNSPEC_VCMUL
+ UNSPEC_VCMUL90
+ UNSPEC_VCMUL180
+ UNSPEC_VCMUL270
+ UNSPEC_VCMLS
+ UNSPEC_VCMLS180
UNSPEC_MATMUL_S
UNSPEC_MATMUL_U
UNSPEC_MATMUL_US
@@ -603,8 +609,6 @@
VADDVQ_P_S
VBICQ_S
VBRSRQ_N_S
- VCADDQ_ROT270_S
- VCADDQ_ROT90_S
VCMPEQQ_S
VCMPEQQ_N_S
VCMPNEQ_N_S
@@ -648,8 +652,6 @@
VADDVQ_P_U
VBICQ_U
VBRSRQ_N_U
- VCADDQ_ROT270_U
- VCADDQ_ROT90_U
VCMPEQQ_U
VCMPEQQ_N_U
VCMPNEQ_N_U
@@ -718,8 +720,6 @@
VABDQ_F
VADDQ_N_F
VBICQ_F
- VCADDQ_ROT270_F
- VCADDQ_ROT90_F
VCMPEQQ_F
VCMPEQQ_N_F
VCMPGEQ_F
@@ -732,10 +732,6 @@
VCMPLTQ_N_F
VCMPNEQ_F
VCMPNEQ_N_F
- VCMULQ_F
- VCMULQ_ROT180_F
- VCMULQ_ROT270_F
- VCMULQ_ROT90_F
VEORQ_F
VMAXNMAQ_F
VMAXNMAVQ_F
@@ -908,7 +904,6 @@
VMLSLDAVAQ_S
VQSHRUNBQ_N_S
VQRSHRUNTQ_N_S
- VCMLAQ_F
VMINNMAQ_M_F
VFMASQ_N_F
VDUPQ_M_N_F
@@ -930,14 +925,12 @@
VADDLVAQ_P_S
VQMOVUNBQ_M_S
VCMPLEQ_M_F
- VCMLAQ_ROT180_F
VMLSLDAVAXQ_S
VRNDXQ_M_F
VFMSQ_F
VMINNMVQ_P_F
VMAXNMVQ_P_F
VPSELQ_F
- VCMLAQ_ROT90_F
VQMOVUNTQ_M_S
VREV64Q_M_F
VNEGQ_M_F
@@ -950,7 +943,6 @@
VRMLALDAVHQ_P_S
VRMLALDAVHXQ_P_S
VCMPEQQ_M_N_F
- VCMLAQ_ROT270_F
VMAXNMAQ_M_F
VRNDQ_M_F
VMLALDAVQ_P_U
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 8d9c89c..7843059 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -186,3 +186,73 @@
(match_operand:VDQ 2 "neon_logic_op2" "")))]
"ARM_HAVE_<MODE>_ARITH"
)
+
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF [(match_operand:VF 1 "register_operand")
+ (match_operand:VF 2 "register_operand")]
+ VCADD))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+)
+
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+ [(set (match_operand:VQ_HSF 0 "register_operand")
+ (unspec:VQ_HSF [(match_operand:VQ_HSF 1 "register_operand")
+ (match_operand:VQ_HSF 2 "register_operand")]
+ VCMUL_OP))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT))
+ && !BYTES_BIG_ENDIAN"
+{
+ rtx res1 = gen_reg_rtx (<MODE>mode);
+ if (TARGET_COMPLEX)
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
+ operands[1], operands[2]));
+ emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
+ operands[1], operands[2]));
+ }
+ else
+ {
+ emit_insn (gen_mve_vcmulq<mve_rotsplit1><mode> (operands[0], operands[1],
+ operands[2]));
+ emit_insn (gen_mve_vcmulq<mve_rotsplit2><mode> (operands[0], operands[1],
+ operands[2]));
+ }
+ DONE;
+})
+
+(define_expand "arm_vcmla<rot><mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (plus:VF (match_operand:VF 1 "register_operand")
+ (unspec:VF [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")]
+ VCMLA)))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+)
+
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cml<fcmac1><rot_op><mode>4"
+ [(set (match_operand:VF 0 "register_operand")
+ (plus:VF (match_operand:VF 1 "register_operand")
+ (unspec:VF [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")]
+ VCMLA_OP)))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[1],
+ operands[2], operands[3]));
+ emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], tmp,
+ operands[2], operands[3]));
+ DONE;
+})