aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-02-01 15:29:13 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-02-01 16:45:05 +0000
commit8bfdf51d8595537937f990947a7a36d3a63dca5f (patch)
tree2b29abe9350fb4e45eb9d2b625d2c22102e63f99 /gcc
parent6b2034c479e5882a8566393e4fb632a23cfd6ff0 (diff)
downloadgcc-8bfdf51d8595537937f990947a7a36d3a63dca5f.zip
gcc-8bfdf51d8595537937f990947a7a36d3a63dca5f.tar.gz
gcc-8bfdf51d8595537937f990947a7a36d3a63dca5f.tar.bz2
aarch64: Reimplement vmovl_high_* intrinsics using builtins
The vmovl_high_* intrinsics map down to the SXTL2/UXTL2 instructions that already have appropriately-named patterns and expanders, so it's straightforward to wire them up. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi, vec_unpacku_hi_): Define builtins. * config/aarch64/arm_neon.h (vmovl_high_s8): Reimplement using builtin. (vmovl_high_s16): Likewise. (vmovl_high_s32): Likewise. (vmovl_high_u8): Likewise. (vmovl_high_u16): Likewise. (vmovl_high_u32): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vmovl_high_1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def4
-rw-r--r--gcc/config/aarch64/arm_neon.h42
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c32
3 files changed, 42 insertions, 36 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 441a456..3115b73 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -394,6 +394,10 @@
/* Implemented by aarch64_xtn2<mode>. */
BUILTIN_VQN (UNOP, xtn2, 0, NONE)
+ /* Implemented by vec_unpack<su>_hi_<mode>. */
+ BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, NONE)
+ BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, NONE)
+
/* Implemented by aarch64_reduc_plus_<mode>. */
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, NONE)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0911ddb..691c0c0 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8125,72 +8125,42 @@ __extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_s8 (int8x16_t __a)
{
- int16x8_t __result;
- __asm__ ("sshll2 %0.8h,%1.16b,#0"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_vec_unpacks_hi_v16qi (__a);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_s16 (int16x8_t __a)
{
- int32x4_t __result;
- __asm__ ("sshll2 %0.4s,%1.8h,#0"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_vec_unpacks_hi_v8hi (__a);
}
__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_s32 (int32x4_t __a)
{
- int64x2_t __result;
- __asm__ ("sshll2 %0.2d,%1.4s,#0"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_vec_unpacks_hi_v4si (__a);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_u8 (uint8x16_t __a)
{
- uint16x8_t __result;
- __asm__ ("ushll2 %0.8h,%1.16b,#0"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_vec_unpacku_hi_v16qi_uu (__a);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_u16 (uint16x8_t __a)
{
- uint32x4_t __result;
- __asm__ ("ushll2 %0.4s,%1.8h,#0"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_vec_unpacku_hi_v8hi_uu (__a);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmovl_high_u32 (uint32x4_t __a)
{
- uint64x2_t __result;
- __asm__ ("ushll2 %0.2d,%1.4s,#0"
- : "=w"(__result)
- : "w"(__a)
- : /* No clobbers */);
- return __result;
+ return __builtin_aarch64_vec_unpacku_hi_v4si_uu (__a);
}
__extension__ extern __inline int16x8_t
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c
new file mode 100644
index 0000000..d45bb83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+#include <arm_neon.h>
+
+#define FUNC(IT, OT, S) \
+OT \
+foo_##S (IT a) \
+{ \
+ return vmovl_high_##S (a); \
+}
+
+FUNC (int8x16_t, int16x8_t, s8)
+/* { dg-final { scan-assembler-times {sxtl2\tv0\.8h, v0\.16b} 1} } */
+
+FUNC (int16x8_t, int32x4_t, s16)
+/* { dg-final { scan-assembler-times {sxtl2\tv0\.4s, v0\.8h} 1} } */
+
+FUNC (int32x4_t, int64x2_t, s32)
+/* { dg-final { scan-assembler-times {sxtl2\tv0\.2d, v0\.4s} 1} } */
+
+FUNC (uint8x16_t, uint16x8_t, u8)
+/* { dg-final { scan-assembler-times {uxtl2\tv0\.8h, v0\.16b} 1} } */
+
+FUNC (uint16x8_t, uint32x4_t, u16)
+/* { dg-final { scan-assembler-times {uxtl2\tv0\.4s, v0\.8h} 1} } */
+
+FUNC (uint32x4_t, uint64x2_t, u32)
+/* { dg-final { scan-assembler-times {uxtl2\tv0\.2d, v0\.4s} 1} } */
+