aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-02-02 14:27:34 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-02-02 15:53:19 +0000
commitd14cf89b94299d6d66c150fbbb9899a5dd91e7d4 (patch)
treec7a96b3464ed8fd7c0910c86e63a5207d0ce2e47 /gcc
parent8fdfd0cfdb7ca6b083a2a6205c40f512ea4631a9 (diff)
downloadgcc-d14cf89b94299d6d66c150fbbb9899a5dd91e7d4.zip
gcc-d14cf89b94299d6d66c150fbbb9899a5dd91e7d4.tar.gz
gcc-d14cf89b94299d6d66c150fbbb9899a5dd91e7d4.tar.bz2
aarch64: Reimplement vrsqrte* intrinsics with builtins
Another very simple move from inline asm to builtins. Only two intrinsics this time. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (ursqrte): Define builtin. * config/aarch64/aarch64-simd.md (aarch64_ursqrte<mode>): New pattern. * config/aarch64/arm_neon.h (vrsqrte_u32): Reimplement using builtin. (vrsqrteq_u32): Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def3
-rw-r--r--gcc/config/aarch64/aarch64-simd.md8
-rw-r--r--gcc/config/aarch64/arm_neon.h14
3 files changed, 13 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 2f50ecc..48e481c 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -705,6 +705,9 @@
/* Implemented by aarch64_rsqrts<mode>. */
BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0, FP)
+ /* Implemented by aarch64_ursqrte<mode>. */
+ BUILTIN_VDQ_SI (UNOPU, ursqrte, 0, NONE)
+
/* Implemented by fabd<mode>3. */
BUILTIN_VHSDF_HSDF (BINOP, fabd, 3, FP)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index eea5c87..767d673 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -755,6 +755,14 @@
DONE;
})
+(define_insn "aarch64_ursqrte<mode>"
+[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
+ (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
+ UNSPEC_RSQRTE))]
+"TARGET_SIMD"
+"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
+
(define_insn "*aarch64_mul3_elt_to_64v2df"
[(set (match_operand:DF 0 "register_operand" "=w")
(mult:DF
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e32f7ab..4b905d9 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -9387,24 +9387,14 @@ __extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsqrte_u32 (uint32x2_t __a)
{
- uint32x2_t __result;
- __asm__ ("ursqrte %0.2s,%1.2s"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_ursqrtev2si_uu (__a);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsqrteq_u32 (uint32x4_t __a)
{
- uint32x4_t __result;
- __asm__ ("ursqrte %0.4s,%1.4s"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_ursqrtev4si_uu (__a);
}
__extension__ extern __inline int8x16_t