diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-02-02 14:27:34 +0000 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-02-02 15:53:19 +0000 |
commit | d14cf89b94299d6d66c150fbbb9899a5dd91e7d4 (patch) | |
tree | c7a96b3464ed8fd7c0910c86e63a5207d0ce2e47 /gcc | |
parent | 8fdfd0cfdb7ca6b083a2a6205c40f512ea4631a9 (diff) | |
download | gcc-d14cf89b94299d6d66c150fbbb9899a5dd91e7d4.zip gcc-d14cf89b94299d6d66c150fbbb9899a5dd91e7d4.tar.gz gcc-d14cf89b94299d6d66c150fbbb9899a5dd91e7d4.tar.bz2 |
aarch64: Reimplement vrsqrte* intrinsics with builtins
Another very simple move from inline asm to builtins.
Only two intrinsics this time.
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def (ursqrte): Define builtin.
* config/aarch64/aarch64-simd.md (aarch64_ursqrte<mode>): New pattern.
* config/aarch64/arm_neon.h (vrsqrte_u32): Reimplement using builtin.
(vrsqrteq_u32): Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 14 |
3 files changed, 13 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 2f50ecc..48e481c 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -705,6 +705,9 @@ /* Implemented by aarch64_rsqrts<mode>. */ BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0, FP) + /* Implemented by aarch64_ursqrte<mode>. */ + BUILTIN_VDQ_SI (UNOPU, ursqrte, 0, NONE) + /* Implemented by fabd<mode>3. */ BUILTIN_VHSDF_HSDF (BINOP, fabd, 3, FP) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index eea5c87..767d673 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -755,6 +755,14 @@ DONE; }) +(define_insn "aarch64_ursqrte<mode>" +[(set (match_operand:VDQ_SI 0 "register_operand" "=w") + (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] + UNSPEC_RSQRTE))] +"TARGET_SIMD" +"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" +[(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) + (define_insn "*aarch64_mul3_elt_to_64v2df" [(set (match_operand:DF 0 "register_operand" "=w") (mult:DF diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e32f7ab..4b905d9 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -9387,24 +9387,14 @@ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsqrte_u32 (uint32x2_t __a) { - uint32x2_t __result; - __asm__ ("ursqrte %0.2s,%1.2s" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_ursqrtev2si_uu (__a); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsqrteq_u32 (uint32x4_t __a) { - uint32x4_t __result; - __asm__ ("ursqrte %0.4s,%1.4s" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_ursqrtev4si_uu (__a); } __extension__ extern __inline int8x16_t |