diff options
author | Matthew Wahab <matthew.wahab@arm.com> | 2016-09-23 10:41:04 +0000 |
---|---|---|
committer | Matthew Wahab <mwahab@gcc.gnu.org> | 2016-09-23 10:41:04 +0000 |
commit | 29c3d574af087966662b30fe33b3727f9bb504df (patch) | |
tree | f20f9c46839cd1b0a27f013a3792f0ea41341b30 /gcc | |
parent | 0768b127e5ea675efb5377fc6bbd52e399a6f580 (diff) | |
download | gcc-29c3d574af087966662b30fe33b3727f9bb504df.zip gcc-29c3d574af087966662b30fe33b3727f9bb504df.tar.gz gcc-29c3d574af087966662b30fe33b3727f9bb504df.tar.bz2 |
[PATCH 13/17][ARM] Add VFP FP16 instrinsics.
gcc/
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* config.gcc (extra_headers): Add arm_fp16.h
* config/arm/arm_fp16.h: New.
* config/arm/arm_neon.h: Include "arm_fp16.h".
From-SVN: r240423
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/config.gcc | 2 | ||||
-rw-r--r-- | gcc/config/arm/arm_fp16.h | 255 | ||||
-rw-r--r-- | gcc/config/arm/arm_neon.h | 1 |
4 files changed, 263 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9d306bd..88556b8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2016-09-23 Matthew Wahab <matthew.wahab@arm.com> + * config.gcc (extra_headers): Add arm_fp16.h + * config/arm/arm_fp16.h: New. + * config/arm/arm_neon.h: Include "arm_fp16.h". + +2016-09-23 Matthew Wahab <matthew.wahab@arm.com> + * config/arm/arm_neon_builtins.def (vadd): New (v8hf, v4hf variants). (vmulf): New (v8hf, v4hf variants). diff --git a/gcc/config.gcc b/gcc/config.gcc index fc91ba7..5a9c50c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -320,7 +320,7 @@ arc*-*-*) arm*-*-*) cpu_type=arm extra_objs="arm-builtins.o aarch-common.o" - extra_headers="mmintrin.h arm_neon.h arm_acle.h" + extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h" target_type_format_char='%' c_target_objs="arm-c.o" cxx_target_objs="arm-c.o" diff --git a/gcc/config/arm/arm_fp16.h b/gcc/config/arm/arm_fp16.h new file mode 100644 index 0000000..c72d8c4 --- /dev/null +++ b/gcc/config/arm/arm_fp16.h @@ -0,0 +1,255 @@ +/* ARM FP16 intrinsics include file. + + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _GCC_ARM_FP16_H +#define _GCC_ARM_FP16_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +/* Intrinsics for FP16 instructions. */ +#pragma GCC push_options +#pragma GCC target ("fpu=fp-armv8") + +#if defined (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) + +typedef __fp16 float16_t; + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vabsh_f16 (float16_t __a) +{ + return __builtin_neon_vabshf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vaddh_f16 (float16_t __a, float16_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtah_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtahssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtah_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtahusi (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_s32 (int32_t __a) +{ + return __builtin_neon_vcvthshf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_u32 (uint32_t __a) +{ + return __builtin_neon_vcvthuhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_s32 (int32_t __a, const int __b) +{ + return __builtin_neon_vcvths_nhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_n_f16_u32 (uint32_t __a, const int __b) +{ + return __builtin_neon_vcvthu_nhf ((int32_t)__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvth_n_s32_f16 (float16_t __a, const int __b) +{ + return __builtin_neon_vcvths_nsi (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvth_n_u32_f16 (float16_t __a, const int __b) +{ + return (uint32_t)__builtin_neon_vcvthu_nsi (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvth_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvthssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvth_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvthusi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtmh_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtmhssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtmh_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtmhusi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtnh_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtnhssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtnh_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtnhusi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtph_s32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtphssi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtph_u32_f16 (float16_t __a) +{ + return __builtin_neon_vcvtphusi (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vdivh_f16 (float16_t __a, float16_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmah_f16 (float16_t __a, float16_t __b, float16_t __c) +{ + return __builtin_neon_vfmahf (__a, __b, __c); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c) +{ + return __builtin_neon_vfmshf (__a, __b, __c); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_neon_vmaxnmhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmh_f16 (float16_t __a, float16_t __b) +{ + return __builtin_neon_vminnmhf (__a, __b); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmulh_f16 (float16_t __a, float16_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vnegh_f16 (float16_t __a) +{ + return - __a; +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndah_f16 (float16_t __a) +{ + return __builtin_neon_vrndahf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndh_f16 (float16_t __a) +{ + return __builtin_neon_vrndhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndih_f16 (float16_t __a) +{ + return __builtin_neon_vrndihf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndmh_f16 (float16_t __a) +{ + return __builtin_neon_vrndmhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndnh_f16 (float16_t __a) +{ + return __builtin_neon_vrndnhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndph_f16 (float16_t __a) +{ + return __builtin_neon_vrndphf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndxh_f16 (float16_t __a) +{ + return __builtin_neon_vrndxhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vsqrth_f16 (float16_t __a) +{ + return __builtin_neon_vsqrthf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vsubh_f16 (float16_t __a, float16_t __b) +{ + return __a - __b; +} + +#endif /* __ARM_FEATURE_FP16_SCALAR_ARITHMETIC */ +#pragma GCC pop_options + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 3bd9517..8ed5aa8 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -38,6 +38,7 @@ extern "C" { #endif +#include <arm_fp16.h> #include <stdint.h> typedef __simd64_int8_t int8x8_t; |