diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-02-09 01:14:00 +0000 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-04-28 21:11:35 +0100 |
commit | 8e7f6e03955244827a513777e4845c98e130319d (patch) | |
tree | 5c545953cf517140fae1005dca5a53662d621253 | |
parent | fa18085a32df06be6e7d899fd804d537c0149baf (diff) | |
download | gcc-8e7f6e03955244827a513777e4845c98e130319d.zip gcc-8e7f6e03955244827a513777e4845c98e130319d.tar.gz gcc-8e7f6e03955244827a513777e4845c98e130319d.tar.bz2 |
aarch64: Use RTL builtins for vpadal_[su]32 intrinsics
Rewrite vpadal_[su]32 Neon intrinsics to use RTL builtins rather than
inline assembly code, allowing for better scheduling and
optimization.
gcc/ChangeLog:
2021-02-09 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Use VDQV_L
iterator to generate [su]adalp RTL builtins.
* config/aarch64/aarch64-simd.md: Use VDQV_L iterator in
[su]adalp RTL pattern.
* config/aarch64/arm_neon.h (vpadal_s32): Use RTL builtin
instead of inline asm.
(vpadal_u32): Likewise.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 14 |
3 files changed, 6 insertions, 16 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index ecf8019..202f690 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -170,8 +170,8 @@ BUILTIN_VDQ_BHSI (TERNOP, saba, 0, NONE) BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, NONE) - BUILTIN_VDQV_S (BINOP, sadalp, 0, NONE) - BUILTIN_VDQV_S (BINOPU, uadalp, 0, NONE) + BUILTIN_VDQV_L (BINOP, sadalp, 0, NONE) + BUILTIN_VDQV_L (BINOPU, uadalp, 0, NONE) /* Implemented by aarch64_<sur>abal<mode>. */ BUILTIN_VD_BHSI (TERNOP, sabal, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8aae6a6..565ce5a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -904,8 +904,8 @@ (define_insn "aarch64_<sur>adalp<mode>" [(set (match_operand:<VDBLW> 0 "register_operand" "=w") - (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w") - (match_operand:<VDBLW> 1 "register_operand" "0")] + (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w") + (match_operand:<VDBLW> 1 "register_operand" "0")] ADALP))] "TARGET_SIMD" "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 7eed6c6..164c76d 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8449,12 +8449,7 @@ __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_s32 (int64x1_t __a, int32x2_t __b) { - int64x1_t __result; - __asm__ ("sadalp %0.1d,%2.2s" - : "=w"(__result) - : "0"(__a), "w"(__b) - : /* No clobbers */); - return __result; + return (int64x1_t) __builtin_aarch64_sadalpv2si (__a[0], __b); } __extension__ extern __inline uint16x4_t @@ -8475,12 +8470,7 @@ __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_u32 (uint64x1_t __a, uint32x2_t __b) { - uint64x1_t __result; - __asm__ ("uadalp %0.1d,%2.2s" - : "=w"(__result) - : "0"(__a), "w"(__b) - : /* No clobbers */); - return __result; + return (uint64x1_t) __builtin_aarch64_uadalpv2si_uuu (__a[0], __b); } __extension__ extern __inline int16x8_t |