diff options
author | Jiong Wang <jiong.wang@arm.com> | 2016-07-25 14:30:52 +0000 |
---|---|---|
committer | Jiong Wang <jiwang@gcc.gnu.org> | 2016-07-25 14:30:52 +0000 |
commit | 33d72b63863de2dedfa7a697d7d0b0e8b80e1416 (patch) | |
tree | 2963f6ac69831eda6927f20fc8fb57d64f3aff8f /gcc/config/aarch64/arm_neon.h | |
parent | daef0a8c7e99cbc574291227f2ed98220a5be4d4 (diff) | |
download | gcc-33d72b63863de2dedfa7a697d7d0b0e8b80e1416.zip gcc-33d72b63863de2dedfa7a697d7d0b0e8b80e1416.tar.gz gcc-33d72b63863de2dedfa7a697d7d0b0e8b80e1416.tar.bz2 |
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
* config/aarch64/aarch64-simd.md
(aarch64_rsqrts<mode>): Extend to HF modes.
(fabd<mode>3): Likewise.
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
(aarch64_<maxmin_uns>p<mode>): Likewise.
(<su><maxmin><mode>3): Likewise.
(<maxmin_uns><mode>3): Likewise.
(<fmaxmin><mode>3): Likewise.
(aarch64_faddp<mode>): Likewise.
(aarch64_fmulx<mode>): Likewise.
(aarch64_frecps<mode>): Likewise.
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
(add<mode>3): Extend to HF modes.
(sub<mode>3): Likewise.
(mul<mode>3): Likewise.
(div<mode>3): Likewise.
(*div<mode>3): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
HF, V4HF and V8HF.
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
* config/aarch64/arm_neon.h (vadd_f16): New.
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
vsubq_f16): Likewise.
From-SVN: r238717
Diffstat (limited to 'gcc/config/aarch64/arm_neon.h')
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b4310f2..baae276 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26385,6 +26385,368 @@ vsqrtq_f16 (float16x8_t a) return __builtin_aarch64_sqrtv8hf (a); } +/* ARMv8.2-A FP16 two operands vector intrinsics. */ + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vadd_f16 (float16x4_t __a, float16x4_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vaddq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vabd_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_fabdv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vabdq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_fabdv8hf (a, b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcage_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_facgev4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcageq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_facgev8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcagt_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_facgtv4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcagtq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_facgtv8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcale_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_faclev4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcaleq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_faclev8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcalt_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_facltv4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcaltq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_facltv8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_cmeqv4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_cmeqv8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_cmgev4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_cmgev8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_cmgtv4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_cmgtv8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_cmlev4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_cmlev8hf_uss (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_cmltv4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_cmltv8hf_uss (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_n_f16_s16 (int16x4_t __a, const int __b) +{ + return __builtin_aarch64_scvtfv4hi (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_f16_s16 (int16x8_t __a, const int __b) +{ + return __builtin_aarch64_scvtfv8hi (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_n_f16_u16 (uint16x4_t __a, const int __b) +{ + return __builtin_aarch64_ucvtfv4hi_sus (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_f16_u16 (uint16x8_t __a, const int __b) +{ + return __builtin_aarch64_ucvtfv8hi_sus (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcvt_n_s16_f16 (float16x4_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzsv4hf (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_s16_f16 (float16x8_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzsv8hf (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcvt_n_u16_f16 (float16x4_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcvtq_n_u16_f16 (float16x8_t __a, const int __b) +{ + return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vdiv_f16 (float16x4_t __a, float16x4_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vdivq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmax_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_smax_nanv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmaxq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_smax_nanv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmaxnm_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fmaxv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmaxnmq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fmaxv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmin_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_smin_nanv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vminq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_smin_nanv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vminnm_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fminv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vminnmq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fminv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmul_f16 (float16x4_t __a, float16x4_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmulx_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_fmulxv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmulxq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_fmulxv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpadd_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_faddpv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vpaddq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_faddpv8hf (a, b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpmax_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_smax_nanpv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vpmaxq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_smax_nanpv8hf (a, b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpmaxnm_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_smaxpv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vpmaxnmq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_smaxpv8hf (a, b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpmin_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_smin_nanpv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vpminq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_smin_nanpv8hf (a, b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vpminnm_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_sminpv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vpminnmq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_sminpv8hf (a, b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrecps_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_aarch64_frecpsv4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrecpsq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_aarch64_frecpsv8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrsqrts_f16 (float16x4_t a, float16x4_t b) +{ + return __builtin_aarch64_rsqrtsv4hf (a, b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrsqrtsq_f16 (float16x8_t a, float16x8_t b) +{ + return __builtin_aarch64_rsqrtsv8hf (a, b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vsub_f16 (float16x4_t __a, float16x4_t __b) +{ + return __a - __b; +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vsubq_f16 (float16x8_t __a, float16x8_t __b) +{ + return __a - __b; +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any |