Committed on behalf of Matthew Wahab

gcc/ 2017-05-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm_neon.h (vadd_f16): Use standard arithmetic operations in fast-math mode. (vaddq_f16): Likewise. (vmul_f16): Likewise. (vmulq_f16): Likewise. (vsub_f16): Likewise. (vsubq_f16): Likewise. * config/arm/neon.md (add<mode>3): New. (sub<mode>3): New. (fma:<VH:mode>3): New. Also remove outdated comment. (mul<mode>3): New. testsuite/ 2017-05-16 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/arm/armv8_2-fp16-arith-1.c: Expand comment. Update expected output of vadd, vsub and vmul instructions. * gcc.target/arm/armv8_2-fp16-arith-2.c: New. * gcc.target/arm/armv8_2-fp16-neon-2.c: New. * gcc.target/arm/armv8_2-fp16-neon-3.c: New. From-SVN: r248090
author: Matthew Wahab <matthew.wahab@arm.com> 2017-05-16 09:23:42 +0000
committer: Tamar Christina <tnfchris@gcc.gnu.org> 2017-05-16 09:23:42 +0000
commit: 6da37857031422ddcc84635905257e491210e665 (patch)
tree: 9ee517f11a3662d067306ca25b52c7f0ec3114b6 /gcc/config/arm
parent: d8c9bc362740560b8d8fbfd4d464d2cbde7591f2 (diff)
download: gcc-6da37857031422ddcc84635905257e491210e665.zip
gcc-6da37857031422ddcc84635905257e491210e665.tar.gz
gcc-6da37857031422ddcc84635905257e491210e665.tar.bz2
2 files changed, 74 insertions, 2 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index f81d77e..65f36e2 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vadd_f16 (float16x4_t __a, float16x4_t __b)
 {
+#ifdef __FAST_MATH__
+  return __a + __b;
+#else
   return __builtin_neon_vaddv4hf (__a, __b);
+#endif
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vaddq_f16 (float16x8_t __a, float16x8_t __b)
 {
+#ifdef __FAST_MATH__
+  return __a + __b;
+#else
   return __builtin_neon_vaddv8hf (__a, __b);
+#endif
 }
 
 __extension__ extern __inline uint16x4_t
@@ -17587,7 +17595,11 @@ __extension__ extern __inline float16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vmul_f16 (float16x4_t __a, float16x4_t __b)
 {
+#ifdef __FAST_MATH__
+  return __a * __b;
+#else
   return __builtin_neon_vmulfv4hf (__a, __b);
+#endif
 }
 
 __extension__ extern __inline float16x4_t
@@ -17608,7 +17620,11 @@ __extension__ extern __inline float16x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vmulq_f16 (float16x8_t __a, float16x8_t __b)
 {
+#ifdef __FAST_MATH__
+  return __a * __b;
+#else
   return __builtin_neon_vmulfv8hf (__a, __b);
+#endif
 }
 
 __extension__ extern __inline float16x8_t
@@ -17804,14 +17820,22 @@ __extension__ extern __inline float16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsub_f16 (float16x4_t __a, float16x4_t __b)
 {
+#ifdef __FAST_MATH__
+  return __a - __b;
+#else
   return __builtin_neon_vsubv4hf (__a, __b);
+#endif
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsubq_f16 (float16x8_t __a, float16x8_t __b)
 {
+#ifdef __FAST_MATH__
+  return __a - __b;
+#else
   return __builtin_neon_vsubv8hf (__a, __b);
+#endif
 }
 
 #endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC.  */
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 50d89eb..0ce3fe4 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -505,6 +505,23 @@
                     (const_string "neon_add<q>")))]
 )
 
+;; As with SFmode, full support for HFmode vector arithmetic is only available
+;; when flag-unsafe-math-optimizations is enabled.
+
+(define_insn "add<mode>3"
+  [(set
+    (match_operand:VH 0 "s_register_operand" "=w")
+    (plus:VH
+     (match_operand:VH 1 "s_register_operand" "w")
+     (match_operand:VH 2 "s_register_operand" "w")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "type")
+   (if_then_else (match_test "<Is_float_mode>")
+    (const_string "neon_fp_addsub_s<q>")
+    (const_string "neon_add<q>")))]
+)
+
 (define_insn "add<mode>3_fp16"
   [(set
     (match_operand:VH 0 "s_register_operand" "=w")
@@ -557,6 +574,17 @@
                     (const_string "neon_sub<q>")))]
 )
 
+(define_insn "sub<mode>3"
+ [(set
+   (match_operand:VH 0 "s_register_operand" "=w")
+   (minus:VH
+    (match_operand:VH 1 "s_register_operand" "w")
+    (match_operand:VH 2 "s_register_operand" "w")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_sub<q>")]
+)
+
 (define_insn "sub<mode>3_fp16"
  [(set
    (match_operand:VH 0 "s_register_operand" "=w")
@@ -664,8 +692,17 @@
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
 
-;; There is limited support for unsafe-math optimizations using the NEON FP16
-;; arithmetic instructions, so only the intrinsic is currently supported.
+(define_insn "fma<VH:mode>4"
+ [(set (match_operand:VH 0 "register_operand" "=w")
+   (fma:VH
+    (match_operand:VH 1 "register_operand" "w")
+    (match_operand:VH 2 "register_operand" "w")
+    (match_operand:VH 3 "register_operand" "0")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_fp_mla_s<q>")]
+)
+
 (define_insn "fma<VH:mode>4_intrinsic"
  [(set (match_operand:VH 0 "register_operand" "=w")
    (fma:VH
@@ -2175,6 +2212,17 @@
                     (const_string "neon_mul_<V_elem_ch><q>")))]
 )
 
+(define_insn "mul<mode>3"
+ [(set
+   (match_operand:VH 0 "s_register_operand" "=w")
+   (mult:VH
+    (match_operand:VH 1 "s_register_operand" "w")
+    (match_operand:VH 2 "s_register_operand" "w")))]
+  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
+)
+
 (define_insn "neon_vmulf<mode>"
  [(set
    (match_operand:VH 0 "s_register_operand" "=w")
author	Matthew Wahab <matthew.wahab@arm.com>	2017-05-16 09:23:42 +0000
committer	Tamar Christina <tnfchris@gcc.gnu.org>	2017-05-16 09:23:42 +0000
commit	6da37857031422ddcc84635905257e491210e665 (patch)
tree	9ee517f11a3662d067306ca25b52c7f0ec3114b6 /gcc/config/arm
parent	d8c9bc362740560b8d8fbfd4d464d2cbde7591f2 (diff)
download	gcc-6da37857031422ddcc84635905257e491210e665.zip gcc-6da37857031422ddcc84635905257e491210e665.tar.gz gcc-6da37857031422ddcc84635905257e491210e665.tar.bz2