diff options
author | Matthew Wahab <matthew.wahab@arm.com> | 2017-05-16 09:23:42 +0000 |
---|---|---|
committer | Tamar Christina <tnfchris@gcc.gnu.org> | 2017-05-16 09:23:42 +0000 |
commit | 6da37857031422ddcc84635905257e491210e665 (patch) | |
tree | 9ee517f11a3662d067306ca25b52c7f0ec3114b6 /gcc/config/arm | |
parent | d8c9bc362740560b8d8fbfd4d464d2cbde7591f2 (diff) | |
download | gcc-6da37857031422ddcc84635905257e491210e665.zip gcc-6da37857031422ddcc84635905257e491210e665.tar.gz gcc-6da37857031422ddcc84635905257e491210e665.tar.bz2 |
Committed on behalf of Matthew Wahab
gcc/
2017-05-16 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/arm_neon.h (vadd_f16): Use standard arithmetic
operations in fast-math mode.
(vaddq_f16): Likewise.
(vmul_f16): Likewise.
(vmulq_f16): Likewise.
(vsub_f16): Likewise.
(vsubq_f16): Likewise.
* config/arm/neon.md (add<mode>3): New.
(sub<mode>3): New.
(fma:<VH:mode>3): New. Also remove outdated comment.
(mul<mode>3): New.
testsuite/
2017-05-16 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/arm/armv8_2-fp16-arith-1.c: Expand comment. Update
expected output of vadd, vsub and vmul instructions.
* gcc.target/arm/armv8_2-fp16-arith-2.c: New.
* gcc.target/arm/armv8_2-fp16-neon-2.c: New.
* gcc.target/arm/armv8_2-fp16-neon-3.c: New.
From-SVN: r248090
Diffstat (limited to 'gcc/config/arm')
-rw-r--r-- | gcc/config/arm/arm_neon.h | 24 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 52 |
2 files changed, 74 insertions, 2 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index f81d77e..65f36e2 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vadd_f16 (float16x4_t __a, float16x4_t __b) { +#ifdef __FAST_MATH__ + return __a + __b; +#else return __builtin_neon_vaddv4hf (__a, __b); +#endif } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddq_f16 (float16x8_t __a, float16x8_t __b) { +#ifdef __FAST_MATH__ + return __a + __b; +#else return __builtin_neon_vaddv8hf (__a, __b); +#endif } __extension__ extern __inline uint16x4_t @@ -17587,7 +17595,11 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_f16 (float16x4_t __a, float16x4_t __b) { +#ifdef __FAST_MATH__ + return __a * __b; +#else return __builtin_neon_vmulfv4hf (__a, __b); +#endif } __extension__ extern __inline float16x4_t @@ -17608,7 +17620,11 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_f16 (float16x8_t __a, float16x8_t __b) { +#ifdef __FAST_MATH__ + return __a * __b; +#else return __builtin_neon_vmulfv8hf (__a, __b); +#endif } __extension__ extern __inline float16x8_t @@ -17804,14 +17820,22 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsub_f16 (float16x4_t __a, float16x4_t __b) { +#ifdef __FAST_MATH__ + return __a - __b; +#else return __builtin_neon_vsubv4hf (__a, __b); +#endif } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubq_f16 (float16x8_t __a, float16x8_t __b) { +#ifdef __FAST_MATH__ + return __a - __b; +#else return __builtin_neon_vsubv8hf (__a, __b); +#endif } #endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */ diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 50d89eb..0ce3fe4 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -505,6 +505,23 @@ (const_string "neon_add<q>")))] ) +;; As with SFmode, full support for HFmode vector arithmetic is only available +;; when flag-unsafe-math-optimizations is enabled. + +(define_insn "add<mode>3" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (plus:VH + (match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set (attr "type") + (if_then_else (match_test "<Is_float_mode>") + (const_string "neon_fp_addsub_s<q>") + (const_string "neon_add<q>")))] +) + (define_insn "add<mode>3_fp16" [(set (match_operand:VH 0 "s_register_operand" "=w") @@ -557,6 +574,17 @@ (const_string "neon_sub<q>")))] ) +(define_insn "sub<mode>3" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (minus:VH + (match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "type" "neon_sub<q>")] +) + (define_insn "sub<mode>3_fp16" [(set (match_operand:VH 0 "s_register_operand" "=w") @@ -664,8 +692,17 @@ [(set_attr "type" "neon_fp_mla_s<q>")] ) -;; There is limited support for unsafe-math optimizations using the NEON FP16 -;; arithmetic instructions, so only the intrinsic is currently supported. +(define_insn "fma<VH:mode>4" + [(set (match_operand:VH 0 "register_operand" "=w") + (fma:VH + (match_operand:VH 1 "register_operand" "w") + (match_operand:VH 2 "register_operand" "w") + (match_operand:VH 3 "register_operand" "0")))] + "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "type" "neon_fp_mla_s<q>")] +) + (define_insn "fma<VH:mode>4_intrinsic" [(set (match_operand:VH 0 "register_operand" "=w") (fma:VH @@ -2175,6 +2212,17 @@ (const_string "neon_mul_<V_elem_ch><q>")))] ) +(define_insn "mul<mode>3" + [(set + (match_operand:VH 0 "s_register_operand" "=w") + (mult:VH + (match_operand:VH 1 "s_register_operand" "w") + (match_operand:VH 2 "s_register_operand" "w")))] + "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] +) + (define_insn "neon_vmulf<mode>" [(set (match_operand:VH 0 "s_register_operand" "=w") |