diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-02-08 11:37:29 +0000 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-04-28 21:10:41 +0100 |
commit | a53b8229e64c78256449005929e599b2eab83fbd (patch) | |
tree | 76ed038c5329b8147816cef9b095caac5ac50ac4 | |
parent | a9cb8b6c1ff34c65f60cf745ae9967a9dfd8f195 (diff) | |
download | gcc-a53b8229e64c78256449005929e599b2eab83fbd.zip gcc-a53b8229e64c78256449005929e599b2eab83fbd.tar.gz gcc-a53b8229e64c78256449005929e599b2eab83fbd.tar.bz2 |
aarch64: Use RTL builtins for vq[r]dmulh[q]_n intrinsics
Rewrite vq[r]dmulh[q]_n Neon intrinsics to use RTL builtins rather
than inline assembly code, allowing for better scheduling and
optimization.
gcc/ChangeLog:
2021-02-08 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Add sq[r]dmulh_n
builtin generator macros.
* config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_n<mode>):
Define.
* config/aarch64/arm_neon.h (vqdmulh_n_s16): Use RTL builtin
instead of inline asm.
(vqdmulh_n_s32): Likewise.
(vqdmulhq_n_s16): Likewise.
(vqdmulhq_n_s32): Likewise.
(vqrdmulh_n_s16): Likewise.
(vqrdmulh_n_s32): Likewise.
(vqrdmulhq_n_s16): Likewise.
(vqrdmulhq_n_s32): Likewise.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 56 |
3 files changed, 23 insertions, 48 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b885bd5..f79e716 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -348,6 +348,9 @@ /* Implemented by aarch64_sq<r>dmulh<mode>. */ BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0, NONE) BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0, NONE) + /* Implemented by aarch64_sq<r>dmulh_n<mode>. */ + BUILTIN_VDQHS (BINOP, sqdmulh_n, 0, NONE) + BUILTIN_VDQHS (BINOP, sqrdmulh_n, 0, NONE) /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */ BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0, NONE) BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 4edee99..5245cf0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4639,6 +4639,18 @@ [(set_attr "type" "neon_sat_mul_<Vetype><q>")] ) +(define_insn "aarch64_sq<r>dmulh_n<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_duplicate:VDQHS + (match_operand:<VEL> 2 "register_operand" "<h_con>"))] + VQDMULH))] + "TARGET_SIMD" + "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] +) + ;; sq<r>dmulh_lane (define_insn "aarch64_sq<r>dmulh_lane<mode>" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index baa30bd..5fb2b3d 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8769,48 +8769,28 @@ __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_n_s16 (int16x4_t __a, int16_t __b) { - int16x4_t __result; - __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqdmulh_nv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_n_s32 (int32x2_t __a, int32_t __b) { - int32x2_t __result; - __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqdmulh_nv2si (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) { - int16x8_t __result; - __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqdmulh_nv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) { - int32x4_t __result; - __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqdmulh_nv4si (__a, __b); } __extension__ extern __inline int8x16_t @@ -8880,48 +8860,28 @@ __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) { - int16x4_t __result; - __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqrdmulh_nv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) { - int32x2_t __result; - __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqrdmulh_nv2si (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) { - int16x8_t __result; - __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqrdmulh_nv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) { - int32x4_t __result; - __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : /* No clobbers */); - return __result; + return __builtin_aarch64_sqrdmulh_nv4si (__a, __b); } __extension__ extern __inline int8x16_t |