diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-02-05 09:17:57 +0000 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-02-05 09:17:57 +0000 |
commit | d9bb52150db4eb282bd8adf0397e848373f85a70 (patch) | |
tree | 4e1faffb73935d44f13f9e08a4c87e496a99ef31 /gcc | |
parent | b6e7a7498732b83df61443c211b8d69454ad0b22 (diff) | |
download | gcc-d9bb52150db4eb282bd8adf0397e848373f85a70.zip gcc-d9bb52150db4eb282bd8adf0397e848373f85a70.tar.gz gcc-d9bb52150db4eb282bd8adf0397e848373f85a70.tar.bz2 |
aarch64: Reimplement vget_high* intrinsics
Similar to the vget_low* intrinsics we should just use a proper vec_select rather than
going through V2DI subregs.
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def (get_high): Define builtin.
* config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Define.
* config/aarch64/arm_neon.h (__GET_HIGH): Delete.
(vget_high_f16): Reimplement using new builtin.
(vget_high_f32): Likewise.
(vget_high_f64): Likewise.
(vget_high_p8): Likewise.
(vget_high_p16): Likewise.
(vget_high_p64): Likewise.
(vget_high_s8): Likewise.
(vget_high_s16): Likewise.
(vget_high_s32): Likewise.
(vget_high_s64): Likewise.
(vget_high_u8): Likewise.
(vget_high_u16): Likewise.
(vget_high_u32): Likewise.
(vget_high_u64): Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 36 |
3 files changed, 28 insertions, 21 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 66420cf..b885bd5 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -57,6 +57,8 @@ /* Implemented by aarch64_get_low<mode>. */ BUILTIN_VQMOV (UNOP, get_low, 0, AUTO_FP) + /* Implemented by aarch64_get_high<mode>. */ + BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP) /* Implemented by aarch64_<sur>q<r>shl<mode>. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e730ff5..71aa77d 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -308,6 +308,17 @@ } ) +(define_expand "aarch64_get_high<mode>" + [(match_operand:<VHALF> 0 "register_operand") + (match_operand:VQMOV 1 "register_operand")] + "TARGET_SIMD" + { + rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi)); + DONE; + } +) + (define_insn_and_split "aarch64_simd_mov_from_<mode>low" [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r") (vec_select:<VHALF> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 67c7f24..baa30bd 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -6400,111 +6400,105 @@ vget_low_u64 (uint64x2_t __a) return (uint64x1_t) {__builtin_aarch64_get_lowv2di ((int64x2_t) __a)}; } -#define __GET_HIGH(__TYPE) \ - uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ - uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ - return vreinterpret_##__TYPE##_u64 (hi); - __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_f16 (float16x8_t __a) { - __GET_HIGH (f16); + return __builtin_aarch64_get_highv8hf (__a); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_f32 (float32x4_t __a) { - __GET_HIGH (f32); + return __builtin_aarch64_get_highv4sf (__a); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_f64 (float64x2_t __a) { - __GET_HIGH (f64); + return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)}; } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_p8 (poly8x16_t __a) { - __GET_HIGH (p8); + return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_p16 (poly16x8_t __a) { - __GET_HIGH (p16); + return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_p64 (poly64x2_t __a) { - __GET_HIGH (p64); + return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s8 (int8x16_t __a) { - __GET_HIGH (s8); + return __builtin_aarch64_get_highv16qi (__a); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s16 (int16x8_t __a) { - __GET_HIGH (s16); + return __builtin_aarch64_get_highv8hi (__a); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s32 (int32x4_t __a) { - __GET_HIGH (s32); + return __builtin_aarch64_get_highv4si (__a); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_s64 (int64x2_t __a) { - __GET_HIGH (s64); + return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)}; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u8 (uint8x16_t __a) { - __GET_HIGH (u8); + return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u16 (uint16x8_t __a) { - __GET_HIGH (u16); + return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u32 (uint32x4_t __a) { - __GET_HIGH (u32); + return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a); } -#undef __GET_HIGH - __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vget_high_u64 (uint64x2_t __a) { - return vcreate_u64 (vgetq_lane_u64 (__a, 1)); + return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)}; } + __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) |