diff options
author | Alan Lawrence <alan.lawrence@arm.com> | 2014-09-05 11:24:56 +0000 |
---|---|---|
committer | Alan Lawrence <alalaw01@gcc.gnu.org> | 2014-09-05 11:24:56 +0000 |
commit | 8684fa50fa4a51661b91ae8a323a24a5a21d08eb (patch) | |
tree | 273c092dba6877a1d1836481209c57a9e82a0046 /gcc | |
parent | 6fff10f9aa6dbb92ccc15d1b643f7148531d7458 (diff) | |
download | gcc-8684fa50fa4a51661b91ae8a323a24a5a21d08eb.zip gcc-8684fa50fa4a51661b91ae8a323a24a5a21d08eb.tar.gz gcc-8684fa50fa4a51661b91ae8a323a24a5a21d08eb.tar.bz2 |
[PATCH AArch64 2/2] Replace temporary inline assembler for vget_high
* config/aarch64/arm_neon.h (__GET_HIGH): New macro.
(vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16,
vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64,
vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64):
Remove temporary __asm__ and reimplement.
From-SVN: r214952
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 211 |
2 files changed, 87 insertions, 132 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 853832b..cb7d78b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> + * config/aarch64/arm_neon.h (__GET_HIGH): New macro. + (vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16, + vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64, + vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64): + Remove temporary __asm__ and reimplement. + +2014-09-05 Alan Lawrence <alan.lawrence@arm.com> + * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code handling cmge, cmgt, cmeq, cmtst. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b33dc5c..4598deb 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4328,6 +4328,85 @@ vget_low_u64 (uint64x2_t __a) #undef __GET_LOW +#define __GET_HIGH(__TYPE) \ + uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ + uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ + return vreinterpret_##__TYPE##_u64 (hi); + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t __a) +{ + __GET_HIGH (f32); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_high_f64 (float64x2_t __a) +{ + __GET_HIGH (f64); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t __a) +{ + __GET_HIGH (p8); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t __a) +{ + __GET_HIGH (p16); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t __a) +{ + __GET_HIGH (s8); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t __a) +{ + __GET_HIGH (s16); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t __a) +{ + __GET_HIGH (s32); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t __a) +{ + __GET_HIGH (s64); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t __a) +{ + __GET_HIGH (u8); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t __a) +{ + __GET_HIGH (u16); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t __a) +{ + __GET_HIGH (u32); +} + +#undef __GET_HIGH + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t __a) +{ + return vcreate_u64 (vgetq_lane_u64 (__a, 1)); +} + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -5770,138 +5849,6 @@ vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vget_high_f32 (float32x4_t a) -{ - float32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vget_high_f64 (float64x2_t a) -{ - float64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vget_high_p8 (poly8x16_t a) -{ - poly8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vget_high_p16 (poly16x8_t a) -{ - poly16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vget_high_s8 (int8x16_t a) -{ - int8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vget_high_s16 (int16x8_t a) -{ - int16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vget_high_s32 (int32x4_t a) -{ - int32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vget_high_s64 (int64x2_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vget_high_u8 (uint8x16_t a) -{ - uint8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vget_high_u16 (uint16x8_t a) -{ - uint16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vget_high_u32 (uint32x4_t a) -{ - uint32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vget_high_u64 (uint64x2_t a) -{ - uint64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vhsub_s8 (int8x8_t a, int8x8_t b) { |