aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-01-07 14:49:08 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-01-08 10:29:25 +0000
commitcab822d4ea7694e7c77fd713ebcfde66ba6e2c25 (patch)
tree98b711f8a0199729b87f8837ff2d3ff7125bcbbf
parentc9d25aa7489fd478098e0ef098438e797d597d3b (diff)
downloadgcc-cab822d4ea7694e7c77fd713ebcfde66ba6e2c25.zip
gcc-cab822d4ea7694e7c77fd713ebcfde66ba6e2c25.tar.gz
gcc-cab822d4ea7694e7c77fd713ebcfde66ba6e2c25.tar.bz2
aarch64: Reimplement vaba* intrinsics using builtins
This patch reimplements the vaba* arm_neon.h intrinsics using RTL builtins that expand to proper RTL patterns rather than using inline asm. The implementation is fairly straightforward by defining new builtins and using them in the header. gcc/ * config/aarch64/aarch64-simd-builtins.def (saba, uaba): Define builtins. * config/aarch64/arm_neon.h (vaba_s8): Implement using builtin. (vaba_s16): Likewise. (vaba_s32): Likewise. (vaba_u8): Likewise. (vaba_u16): Likewise. (vaba_u32): Likewise. (vabaq_s8): Likewise. (vabaq_s16): Likewise. (vabaq_s32): Likewise. (vabaq_u8): Likewise. (vabaq_u16): Likewise. (vabaq_u32): Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def5
-rw-r--r--gcc/config/aarch64/arm_neon.h84
2 files changed, 17 insertions, 72 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ea4ce73..3cc8e09 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -148,6 +148,11 @@
BUILTIN_VDQ_BHSI (BINOP, uhsub, 0, NONE)
BUILTIN_VDQ_BHSI (BINOP, srhadd, 0, NONE)
BUILTIN_VDQ_BHSI (BINOP, urhadd, 0, NONE)
+
+ /* Implemented by aarch64_<su>aba<mode>. */
+ BUILTIN_VDQ_BHSI (TERNOP, saba, 0, NONE)
+ BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, NONE)
+
/* Implemented by aarch64_<sur><addsub>hn<mode>. */
BUILTIN_VQN (BINOP, addhn, 0, NONE)
BUILTIN_VQN (BINOP, subhn, 0, NONE)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 82da8e2..3819ed3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6621,72 +6621,42 @@ __extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
{
- int8x8_t __result;
- __asm__ ("saba %0.8b,%2.8b,%3.8b"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_sabav8qi (__a, __b, __c);
}
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
{
- int16x4_t __result;
- __asm__ ("saba %0.4h,%2.4h,%3.4h"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_sabav4hi (__a, __b, __c);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
{
- int32x2_t __result;
- __asm__ ("saba %0.2s,%2.2s,%3.2s"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_sabav2si (__a, __b, __c);
}
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
{
- uint8x8_t __result;
- __asm__ ("uaba %0.8b,%2.8b,%3.8b"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_uabav8qi_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
{
- uint16x4_t __result;
- __asm__ ("uaba %0.4h,%2.4h,%3.4h"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_uabav4hi_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
{
- uint32x2_t __result;
- __asm__ ("uaba %0.2s,%2.2s,%3.2s"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_uabav2si_uuuu (__a, __b, __c);
}
__extension__ extern __inline int16x8_t
@@ -6837,72 +6807,42 @@ __extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
{
- int8x16_t __result;
- __asm__ ("saba %0.16b,%2.16b,%3.16b"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_sabav16qi (__a, __b, __c);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
- int16x8_t __result;
- __asm__ ("saba %0.8h,%2.8h,%3.8h"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_sabav8hi (__a, __b, __c);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
- int32x4_t __result;
- __asm__ ("saba %0.4s,%2.4s,%3.4s"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_sabav4si (__a, __b, __c);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
{
- uint8x16_t __result;
- __asm__ ("uaba %0.16b,%2.16b,%3.16b"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_uabav16qi_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
- uint16x8_t __result;
- __asm__ ("uaba %0.8h,%2.8h,%3.8h"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_uabav8hi_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
- uint32x4_t __result;
- __asm__ ("uaba %0.4s,%2.4s,%3.4s"
- : "=w"(__result)
- : "0"(__a), "w"(__b), "w"(__c)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_uabav4si_uuuu (__a, __b, __c);
}
__extension__ extern __inline int8x8_t