diff options
author | Jiong Wang <jiong.wang@arm.com> | 2016-07-25 16:15:34 +0000 |
---|---|---|
committer | Jiong Wang <jiwang@gcc.gnu.org> | 2016-07-25 16:15:34 +0000 |
commit | bb6131dbd15c8aca4e87f8197377b62331ecbe4a (patch) | |
tree | 2a6b74d2d10853fe40e5f8ffd54e73e747c7139d | |
parent | 9a594ad6ef76d46da25ef7820451fff7907d50bf (diff) | |
download | gcc-bb6131dbd15c8aca4e87f8197377b62331ecbe4a.zip gcc-bb6131dbd15c8aca4e87f8197377b62331ecbe4a.tar.gz gcc-bb6131dbd15c8aca4e87f8197377b62331ecbe4a.tar.bz2 |
[AArch64][10/10] ARMv8.2-A FP16 lane scalar intrinsics
gcc/
* config/aarch64/arm_neon.h (vfmah_lane_f16, vfmah_laneq_f16,
vfmsh_lane_f16, vfmsh_laneq_f16, vmulh_lane_f16, vmulh_laneq_f16,
vmulxh_lane_f16, vmulxh_laneq_f16): New.
From-SVN: r238725
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 52 |
2 files changed, 58 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c1da62a..303e4f5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2016-07-25 Jiong Wang <jiong.wang@arm.com> + * config/aarch64/arm_neon.h (vfmah_lane_f16, vfmah_laneq_f16, + vfmsh_lane_f16, vfmsh_laneq_f16, vmulh_lane_f16, vmulh_laneq_f16, + vmulxh_lane_f16, vmulxh_laneq_f16): New. + +2016-07-25 Jiong Wang <jiong.wang@arm.com> + * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64.md (fma, fnma): Support HF. * config/aarch64/arm_fp16.h (vfmah_f16, vfmsh_f16): New. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fd55558..ab3a00c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26777,6 +26777,20 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) /* ARMv8.2-A FP16 lane vector intrinsics. */ +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_lane_f16 (float16_t __a, float16_t __b, + float16x4_t __c, const int __lane) +{ + return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_laneq_f16 (float16_t __a, float16_t __b, + float16x8_t __c, const int __lane) +{ + return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vfma_lane_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c, const int __lane) @@ -26817,6 +26831,20 @@ vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_lane_f16 (float16_t __a, float16_t __b, + float16x4_t __c, const int __lane) +{ + return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_laneq_f16 (float16_t __a, float16_t __b, + float16x8_t __c, const int __lane) +{ + return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vfms_lane_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c, const int __lane) @@ -26857,6 +26885,12 @@ vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_any (__b, __lane); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) { @@ -26869,6 +26903,12 @@ vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_any (__b, __lane); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) { @@ -26893,6 +26933,12 @@ vmulq_n_f16 (float16x8_t __a, float16_t __b) return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) +{ + return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) { @@ -26905,6 +26951,12 @@ vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane)); } +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) +{ + return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); +} + __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) { |