diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-02-01 15:29:13 +0000 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-02-01 16:45:05 +0000 |
commit | 8bfdf51d8595537937f990947a7a36d3a63dca5f (patch) | |
tree | 2b29abe9350fb4e45eb9d2b625d2c22102e63f99 /gcc | |
parent | 6b2034c479e5882a8566393e4fb632a23cfd6ff0 (diff) | |
download | gcc-8bfdf51d8595537937f990947a7a36d3a63dca5f.zip gcc-8bfdf51d8595537937f990947a7a36d3a63dca5f.tar.gz gcc-8bfdf51d8595537937f990947a7a36d3a63dca5f.tar.bz2 |
aarch64: Reimplement vmovl_high_* intrinsics using builtins
The vmovl_high_* intrinsics map down to the SXTL2/UXTL2 instructions
that already have appropriately-named patterns and expanders,
so it's straightforward to wire them up.
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi,
vec_unpacku_hi_): Define builtins.
* config/aarch64/arm_neon.h (vmovl_high_s8): Reimplement using
builtin.
(vmovl_high_s16): Likewise.
(vmovl_high_s32): Likewise.
(vmovl_high_u8): Likewise.
(vmovl_high_u16): Likewise.
(vmovl_high_u32): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/vmovl_high_1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c | 32 |
3 files changed, 42 insertions, 36 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 441a456..3115b73 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -394,6 +394,10 @@ /* Implemented by aarch64_xtn2<mode>. */ BUILTIN_VQN (UNOP, xtn2, 0, NONE) + /* Implemented by vec_unpack<su>_hi_<mode>. */ + BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, NONE) + BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, NONE) + /* Implemented by aarch64_reduc_plus_<mode>. */ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, NONE) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0911ddb..691c0c0 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8125,72 +8125,42 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s8 (int8x16_t __a) { - int16x8_t __result; - __asm__ ("sshll2 %0.8h,%1.16b,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacks_hi_v16qi (__a); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s16 (int16x8_t __a) { - int32x4_t __result; - __asm__ ("sshll2 %0.4s,%1.8h,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacks_hi_v8hi (__a); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s32 (int32x4_t __a) { - int64x2_t __result; - __asm__ ("sshll2 %0.2d,%1.4s,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacks_hi_v4si (__a); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u8 (uint8x16_t __a) { - uint16x8_t __result; - __asm__ ("ushll2 %0.8h,%1.16b,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacku_hi_v16qi_uu (__a); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u16 (uint16x8_t __a) { - uint32x4_t __result; - __asm__ ("ushll2 %0.4s,%1.8h,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacku_hi_v8hi_uu (__a); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u32 (uint32x4_t __a) { - uint64x2_t __result; - __asm__ ("ushll2 %0.2d,%1.4s,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacku_hi_v4si_uu (__a); } __extension__ extern __inline int16x8_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c new file mode 100644 index 0000000..d45bb83 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include <arm_neon.h> + +#include <arm_neon.h> + +#define FUNC(IT, OT, S) \ +OT \ +foo_##S (IT a) \ +{ \ + return vmovl_high_##S (a); \ +} + +FUNC (int8x16_t, int16x8_t, s8) +/* { dg-final { scan-assembler-times {sxtl2\tv0\.8h, v0\.16b} 1} } */ + +FUNC (int16x8_t, int32x4_t, s16) +/* { dg-final { scan-assembler-times {sxtl2\tv0\.4s, v0\.8h} 1} } */ + +FUNC (int32x4_t, int64x2_t, s32) +/* { dg-final { scan-assembler-times {sxtl2\tv0\.2d, v0\.4s} 1} } */ + +FUNC (uint8x16_t, uint16x8_t, u8) +/* { dg-final { scan-assembler-times {uxtl2\tv0\.8h, v0\.16b} 1} } */ + +FUNC (uint16x8_t, uint32x4_t, u16) +/* { dg-final { scan-assembler-times {uxtl2\tv0\.4s, v0\.8h} 1} } */ + +FUNC (uint32x4_t, uint64x2_t, u32) +/* { dg-final { scan-assembler-times {uxtl2\tv0\.2d, v0\.4s} 1} } */ + |