diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/arm/arm_mve.h | 70 | ||||
-rw-r--r-- | gcc/config/arm/arm_mve_builtins.def | 26 | ||||
-rw-r--r-- | gcc/config/arm/constraints.md | 2 | ||||
-rw-r--r-- | gcc/config/arm/iterators.md | 44 | ||||
-rw-r--r-- | gcc/config/arm/mve.md | 172 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 20 | ||||
-rw-r--r-- | gcc/config/arm/unspecs.md | 20 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 70 |
8 files changed, 216 insertions, 208 deletions
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 6c0d1e2..4501462 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b); + return (uint8x16_t) + __builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b); + return (uint8x16_t) + __builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b); } __extension__ extern __inline uint8x16_t @@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b) { - return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b); + return __builtin_mve_vcaddq_rot90v16qi (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b) { - return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b); + return __builtin_mve_vcaddq_rot270v16qi (__a, __b); } __extension__ extern __inline int8x16_t @@ -4821,14 +4823,16 @@ __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b); + return (uint16x8_t) + __builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b); + return (uint16x8_t) + __builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b); } __extension__ extern __inline uint16x8_t @@ -5360,14 +5364,14 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b) { - return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b); + return __builtin_mve_vcaddq_rot90v8hi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b) { - return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b); + return __builtin_mve_vcaddq_rot270v8hi (__a, __b); } __extension__ extern __inline int16x8_t @@ -5661,14 +5665,16 @@ __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_mve_vcaddq_rot90_uv4si (__a, __b); + return (uint32x4_t) + __builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_mve_vcaddq_rot270_uv4si (__a, __b); + return (uint32x4_t) + __builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b); } __extension__ extern __inline uint32x4_t @@ -6200,14 +6206,14 @@ __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b) { - return __builtin_mve_vcaddq_rot90_sv4si (__a, __b); + return __builtin_mve_vcaddq_rot90v4si (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b) { - return __builtin_mve_vcaddq_rot270_sv4si (__a, __b); + return __builtin_mve_vcaddq_rot270v4si (__a, __b); } __extension__ extern __inline int32x4_t @@ -17342,42 +17348,42 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_rot90_fv8hf (__a, __b); + return __builtin_mve_vcmulq_rot90v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_rot270_fv8hf (__a, __b); + return __builtin_mve_vcmulq_rot270v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_rot180_fv8hf (__a, __b); + return __builtin_mve_vcmulq_rot180v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcmulq_fv8hf (__a, __b); + return __builtin_mve_vcmulqv8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b); + return __builtin_mve_vcaddq_rot90v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b); + return __builtin_mve_vcaddq_rot270v8hf (__a, __b); } __extension__ extern __inline float16x8_t @@ -17594,42 +17600,42 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot90_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_rot90_fv4sf (__a, __b); + return __builtin_mve_vcmulq_rot90v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot270_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_rot270_fv4sf (__a, __b); + return __builtin_mve_vcmulq_rot270v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_rot180_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_rot180_fv4sf (__a, __b); + return __builtin_mve_vcmulq_rot180v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmulq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcmulq_fv4sf (__a, __b); + return __builtin_mve_vcmulqv4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b); + return __builtin_mve_vcaddq_rot90v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b); + return __builtin_mve_vcaddq_rot270v4sf (__a, __b); } __extension__ extern __inline float32x4_t @@ -17784,28 +17790,28 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaqv8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_rot180_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_rot270_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) { - return __builtin_mve_vcmlaq_rot90_fv8hf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c); } __extension__ extern __inline float16x8_t @@ -18092,28 +18098,28 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaqv4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_rot180_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_rot270_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __builtin_mve_vcmlaq_rot90_fv4sf (__a, __b, __c); + return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c); } __extension__ extern __inline float32x4_t diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index f38926f..56b652f 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) @@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si) @@ -264,12 +260,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vmaxnmq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vmaxnmavq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vmaxnmaq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, veorq_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf) @@ -470,10 +460,6 @@ VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmsq_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmasq_n_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_n_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrntq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrnbq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vrshrntq_n_s, v8hi, v4si) @@ -892,3 +878,15 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si) VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si) + +/* optabs without any suffixes. */ +VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf) +VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180, v8hf, v4sf) +VAR2 (BINOP_NONE_NONE_NONE, vcmulq, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq, v8hf, v4sf) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 789e333..6ebddb9 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -310,7 +310,7 @@ "@internal In ARM/Thumb-2 state a vector of constant zeros." (and (match_code "const_vector") - (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)"))) (define_constraint "Da" "@internal diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 5fcb7af..5fc75cb 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1177,11 +1177,40 @@ (define_int_attr rot [(UNSPEC_VCADD90 "90") (UNSPEC_VCADD270 "270") + (UNSPEC_VCMLS "0") (UNSPEC_VCMLA "0") (UNSPEC_VCMLA90 "90") (UNSPEC_VCMLA180 "180") (UNSPEC_VCMLA270 "270")]) +(define_int_attr mve_rotsplit1 [(UNSPEC_VCMLA "") + (UNSPEC_VCMLA180 "") + (UNSPEC_VCMUL "") + (UNSPEC_VCMUL180 "") + (UNSPEC_VCMLS "_rot270") + (UNSPEC_VCMLS180 "_rot90")]) + +(define_int_attr mve_rotsplit2 [(UNSPEC_VCMLA "_rot90") + (UNSPEC_VCMLA180 "_rot270") + (UNSPEC_VCMUL "_rot90") + (UNSPEC_VCMUL180 "_rot270") + (UNSPEC_VCMLS "_rot180") + (UNSPEC_VCMLS180 "_rot180")]) + +(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90") + (UNSPEC_VCADD270 "_rot270") + (UNSPEC_VCMLA "") + (UNSPEC_VCMLA90 "_rot90") + (UNSPEC_VCMLA180 "_rot180") + (UNSPEC_VCMLA270 "_rot270") + (UNSPEC_VCMUL "") + (UNSPEC_VCMUL90 "_rot90") + (UNSPEC_VCMUL180 "_rot180") + (UNSPEC_VCMUL270 "_rot270")]) + +(define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90 + UNSPEC_VCMUL180 UNSPEC_VCMUL270]) + (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8") (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8") (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8") @@ -1233,9 +1262,8 @@ (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s") (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u") (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s") (VBICQ_U "u") - (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s") - (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s") - (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u") + (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") + (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s") (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u") (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s") @@ -1502,8 +1530,6 @@ (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S]) (define_int_iterator VBICQ [VBICQ_S VBICQ_U]) (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S]) -(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U]) -(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S]) (define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S]) (define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U]) (define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S]) @@ -1712,3 +1738,11 @@ (define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48]) (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48]) (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U]) +;; Define iterators for VCMLA operations +(define_int_iterator VCMLA_OP [UNSPEC_VCMLA + UNSPEC_VCMLA180 + UNSPEC_VCMLS]) + +;; Define iterators for VCMLA operations as MUL +(define_int_iterator VCMUL_OP [UNSPEC_VCMUL + UNSPEC_VCMUL180]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 4b2e46a..d29f387 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -950,34 +950,28 @@ ]) ;; -;; [vcaddq_rot270_s, vcaddq_rot270_u]) +;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270]) ;; -(define_insn "mve_vcaddq_rot270_<supf><mode>" +(define_insn "mve_vcaddq<mve_rot><mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w")] - VCADDQ_ROT270)) + VCADD)) ] "TARGET_HAVE_MVE" - "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270" + "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) -;; -;; [vcaddq_rot90_u, vcaddq_rot90_s]) -;; -(define_insn "mve_vcaddq_rot90_<supf><mode>" - [ - (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w")] - VCADDQ_ROT90)) - ] - "TARGET_HAVE_MVE" - "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90" - [(set_attr "type" "mve_move") -]) +;; Auto vectorizer pattern for int vcadd +(define_expand "cadd<rot><mode>3" + [(set (match_operand:MVE_2 0 "register_operand") + (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand") + (match_operand:MVE_2 2 "register_operand")] + VCADD))] + "TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN" +) ;; ;; [vcmpcsq_n_u]) @@ -2084,32 +2078,17 @@ ]) ;; -;; [vcaddq_rot270_f]) -;; -(define_insn "mve_vcaddq_rot270_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCADDQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcaddq_rot90_f]) +;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270]) ;; -(define_insn "mve_vcaddq_rot90_f<mode>" +(define_insn "mve_vcaddq<mve_rot><mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w")] - VCADDQ_ROT90_F)) + VCADD)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90" + "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) @@ -2294,62 +2273,17 @@ ]) ;; -;; [vcmulq_f]) -;; -(define_insn "mve_vcmulq_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #0" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmulq_rot180_f]) -;; -(define_insn "mve_vcmulq_rot180_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_ROT180_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #180" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmulq_rot270_f]) -;; -(define_insn "mve_vcmulq_rot270_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmulq_rot90_f]) +;; [vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270]) ;; -(define_insn "mve_vcmulq_rot90_f<mode>" +(define_insn "mve_vcmulq<mve_rot><mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w")] - VCMULQ_ROT90_F)) + VCMUL)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #90" + "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) @@ -4122,66 +4056,20 @@ [(set_attr "type" "mve_move") (set_attr "length""8")]) ;; -;; [vcmlaq_f]) -;; -(define_insn "mve_vcmlaq_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #0" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmlaq_rot180_f]) -;; -(define_insn "mve_vcmlaq_rot180_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_ROT180_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #180" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmlaq_rot270_f]) -;; -(define_insn "mve_vcmlaq_rot270_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcmlaq_rot90_f]) +;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270]) ;; -(define_insn "mve_vcmlaq_rot90_f<mode>" +(define_insn "mve_vcmlaq<mve_rot><mode>" [ - (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") - (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:MVE_0 3 "s_register_operand" "w")] - VCMLAQ_ROT90_F)) + (set (match_operand:MVE_0 0 "s_register_operand" "=w,w") + (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz") + (match_operand:MVE_0 2 "s_register_operand" "w,w") + (match_operand:MVE_0 3 "s_register_operand" "w,w")] + VCMLA)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #90" + "@ + vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot> + vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>" [(set_attr "type" "mve_move") ]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 669c34d..487c0a1 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3030,6 +3030,26 @@ [(set_attr "type" "neon_fcmla")] ) +;; The complex mul operations always need to expand to two instructions. +;; The first operation does half the computation and the second does the +;; remainder. Because of this, expand early. +(define_expand "cmul<rot_op><mode>3" + [(set (match_operand:VDF 0 "register_operand") + (unspec:VDF [(match_operand:VDF 1 "register_operand") + (match_operand:VDF 2 "register_operand")] + VCMUL_OP))] + "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + rtx res1 = gen_reg_rtx (<MODE>mode); + emit_move_insn (tmp, CONST0_RTX (<MODE>mode)); + emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp, + operands[1], operands[2])); + emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1, + operands[1], operands[2])); + DONE; +}) + ;; These instructions map to the __builtins for the Dot Product operations. (define_insn "neon_<sup>dot<vsi2qi>" diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index c2076c9..8bb0060 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -510,6 +510,12 @@ UNSPEC_VCMLA90 UNSPEC_VCMLA180 UNSPEC_VCMLA270 + UNSPEC_VCMUL + UNSPEC_VCMUL90 + UNSPEC_VCMUL180 + UNSPEC_VCMUL270 + UNSPEC_VCMLS + UNSPEC_VCMLS180 UNSPEC_MATMUL_S UNSPEC_MATMUL_U UNSPEC_MATMUL_US @@ -603,8 +609,6 @@ VADDVQ_P_S VBICQ_S VBRSRQ_N_S - VCADDQ_ROT270_S - VCADDQ_ROT90_S VCMPEQQ_S VCMPEQQ_N_S VCMPNEQ_N_S @@ -648,8 +652,6 @@ VADDVQ_P_U VBICQ_U VBRSRQ_N_U - VCADDQ_ROT270_U - VCADDQ_ROT90_U VCMPEQQ_U VCMPEQQ_N_U VCMPNEQ_N_U @@ -718,8 +720,6 @@ VABDQ_F VADDQ_N_F VBICQ_F - VCADDQ_ROT270_F - VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F VCMPGEQ_F @@ -732,10 +732,6 @@ VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F - VCMULQ_F - VCMULQ_ROT180_F - VCMULQ_ROT270_F - VCMULQ_ROT90_F VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F @@ -908,7 +904,6 @@ VMLSLDAVAQ_S VQSHRUNBQ_N_S VQRSHRUNTQ_N_S - VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F VDUPQ_M_N_F @@ -930,14 +925,12 @@ VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F - VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F - VCMLAQ_ROT90_F VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F @@ -950,7 +943,6 @@ VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S VCMPEQQ_M_N_F - VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F VMLALDAVQ_P_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 8d9c89c..7843059 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -186,3 +186,73 @@ (match_operand:VDQ 2 "neon_logic_op2" "")))] "ARM_HAVE_<MODE>_ARITH" ) + +(define_expand "cadd<rot><mode>3" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF [(match_operand:VF 1 "register_operand") + (match_operand:VF 2 "register_operand")] + VCADD))] + "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT + && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN" +) + +;; The complex mul operations always need to expand to two instructions. +;; The first operation does half the computation and the second does the +;; remainder. Because of this, expand early. +(define_expand "cmul<rot_op><mode>3" + [(set (match_operand:VQ_HSF 0 "register_operand") + (unspec:VQ_HSF [(match_operand:VQ_HSF 1 "register_operand") + (match_operand:VQ_HSF 2 "register_operand")] + VCMUL_OP))] + "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT)) + && !BYTES_BIG_ENDIAN" +{ + rtx res1 = gen_reg_rtx (<MODE>mode); + if (TARGET_COMPLEX) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_move_insn (tmp, CONST0_RTX (<MODE>mode)); + emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp, + operands[1], operands[2])); + emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1, + operands[1], operands[2])); + } + else + { + emit_insn (gen_mve_vcmulq<mve_rotsplit1><mode> (operands[0], operands[1], + operands[2])); + emit_insn (gen_mve_vcmulq<mve_rotsplit2><mode> (operands[0], operands[1], + operands[2])); + } + DONE; +}) + +(define_expand "arm_vcmla<rot><mode>" + [(set (match_operand:VF 0 "register_operand") + (plus:VF (match_operand:VF 1 "register_operand") + (unspec:VF [(match_operand:VF 2 "register_operand") + (match_operand:VF 3 "register_operand")] + VCMLA)))] + "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT + && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN" +) + +;; The complex mla/mls operations always need to expand to two instructions. +;; The first operation does half the computation and the second does the +;; remainder. Because of this, expand early. +(define_expand "cml<fcmac1><rot_op><mode>4" + [(set (match_operand:VF 0 "register_operand") + (plus:VF (match_operand:VF 1 "register_operand") + (unspec:VF [(match_operand:VF 2 "register_operand") + (match_operand:VF 3 "register_operand")] + VCMLA_OP)))] + "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT + && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[1], + operands[2], operands[3])); + emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], tmp, + operands[2], operands[3])); + DONE; +}) |