diff options
author | Jiong Wang <jiong.wang@arm.com> | 2016-07-25 15:00:14 +0000 |
---|---|---|
committer | Jiong Wang <jiwang@gcc.gnu.org> | 2016-07-25 15:00:14 +0000 |
commit | 703bbcdfe9f2a442ecc58366d3fcd0672a14c367 (patch) | |
tree | 9651fc5e3c1574040eff548764708326a1de20fc /gcc | |
parent | ab2e8f01f1bc926ba403ea16f1663c95aa1a3c66 (diff) | |
download | gcc-703bbcdfe9f2a442ecc58366d3fcd0672a14c367.zip gcc-703bbcdfe9f2a442ecc58366d3fcd0672a14c367.tar.gz gcc-703bbcdfe9f2a442ecc58366d3fcd0672a14c367.tar.bz2 |
[AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
reduc_smin_scal_): Use VDQIF_F16.
(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
Use VHSDF.
(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
* config/aarch64/iterators.md (VDQIF_F16): New.
(vp): Support HF modes.
* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.
From-SVN: r238721
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 50 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 7 |
5 files changed, 78 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3481f75..e9dd4f3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,18 @@ 2016-07-25 Jiong Wang <jiong.wang@arm.com> + * config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_, + reduc_smin_scal_): Use VDQIF_F16. + (reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF. + * config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>): + Use VHSDF. + (aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise. + * config/aarch64/iterators.md (VDQIF_F16): New. + (vp): Support HF modes. + * config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16, + vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New. + +2016-07-25 Jiong Wang <jiong.wang@arm.com> + * config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to "*aarch64_mulx_elt_from_dup<mode>". (*aarch64_mul3_elt<mode>): Update schedule type. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b888fd6..363e131 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -234,12 +234,12 @@ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */ - BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) - BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) + BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10) + BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) - BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) - BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) + BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10) + BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10) /* Implemented by <maxmin><mode>3. smax variants map to fmaxnm, diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7d2e97f..501858d 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2130,8 +2130,8 @@ ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). (define_expand "reduc_<maxmin_uns>_scal_<mode>" [(match_operand:<VEL> 0 "register_operand") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] - FMAXMINV)] + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] + FMAXMINV)] "TARGET_SIMD" { rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); @@ -2178,12 +2178,12 @@ ) (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] - FMAXMINV))] + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] + FMAXMINV))] "TARGET_SIMD" "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" - [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] + [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] ) ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 8b31e31..4382efd 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b) return vmulxq_f16 (__a, vdupq_n_f16 (__b)); } +/* ARMv8.2-A FP16 reduction vector intrinsics. */ + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smax_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smax_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smin_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smin_scal_v8hf (__a); +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 8d4dc6c..011b937 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -159,6 +159,8 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI + V4HF V8HF V2SF V4SF V2DF]) ;; Vector modes for S type. (define_mode_iterator VDQ_SI [V2SI V4SI]) @@ -760,8 +762,9 @@ (define_mode_attr vp [(V8QI "v") (V16QI "v") (V4HI "v") (V8HI "v") (V2SI "p") (V4SI "v") - (V2DI "p") (V2DF "p") - (V2SF "p") (V4SF "v")]) + (V2DI "p") (V2DF "p") + (V2SF "p") (V4SF "v") + (V4HF "v") (V8HF "v")]) (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) |