aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/arm
diff options
context:
space:
mode:
authorMatthew Wahab <matthew.wahab@arm.com>2017-05-16 09:23:42 +0000
committerTamar Christina <tnfchris@gcc.gnu.org>2017-05-16 09:23:42 +0000
commit6da37857031422ddcc84635905257e491210e665 (patch)
tree9ee517f11a3662d067306ca25b52c7f0ec3114b6 /gcc/config/arm
parentd8c9bc362740560b8d8fbfd4d464d2cbde7591f2 (diff)
downloadgcc-6da37857031422ddcc84635905257e491210e665.zip
gcc-6da37857031422ddcc84635905257e491210e665.tar.gz
gcc-6da37857031422ddcc84635905257e491210e665.tar.bz2
Committed on behalf of Matthew Wahab
gcc/ 2017-05-16 Matthew Wahab <matthew.wahab@arm.com> * config/arm/arm_neon.h (vadd_f16): Use standard arithmetic operations in fast-math mode. (vaddq_f16): Likewise. (vmul_f16): Likewise. (vmulq_f16): Likewise. (vsub_f16): Likewise. (vsubq_f16): Likewise. * config/arm/neon.md (add<mode>3): New. (sub<mode>3): New. (fma:<VH:mode>3): New. Also remove outdated comment. (mul<mode>3): New. testsuite/ 2017-05-16 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/arm/armv8_2-fp16-arith-1.c: Expand comment. Update expected output of vadd, vsub and vmul instructions. * gcc.target/arm/armv8_2-fp16-arith-2.c: New. * gcc.target/arm/armv8_2-fp16-neon-2.c: New. * gcc.target/arm/armv8_2-fp16-neon-3.c: New. From-SVN: r248090
Diffstat (limited to 'gcc/config/arm')
-rw-r--r--gcc/config/arm/arm_neon.h24
-rw-r--r--gcc/config/arm/neon.md52
2 files changed, 74 insertions, 2 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index f81d77e..65f36e2 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vadd_f16 (float16x4_t __a, float16x4_t __b)
{
+#ifdef __FAST_MATH__
+ return __a + __b;
+#else
return __builtin_neon_vaddv4hf (__a, __b);
+#endif
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddq_f16 (float16x8_t __a, float16x8_t __b)
{
+#ifdef __FAST_MATH__
+ return __a + __b;
+#else
return __builtin_neon_vaddv8hf (__a, __b);
+#endif
}
__extension__ extern __inline uint16x4_t
@@ -17587,7 +17595,11 @@ __extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmul_f16 (float16x4_t __a, float16x4_t __b)
{
+#ifdef __FAST_MATH__
+ return __a * __b;
+#else
return __builtin_neon_vmulfv4hf (__a, __b);
+#endif
}
__extension__ extern __inline float16x4_t
@@ -17608,7 +17620,11 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulq_f16 (float16x8_t __a, float16x8_t __b)
{
+#ifdef __FAST_MATH__
+ return __a * __b;
+#else
return __builtin_neon_vmulfv8hf (__a, __b);
+#endif
}
__extension__ extern __inline float16x8_t
@@ -17804,14 +17820,22 @@ __extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsub_f16 (float16x4_t __a, float16x4_t __b)
{
+#ifdef __FAST_MATH__
+ return __a - __b;
+#else
return __builtin_neon_vsubv4hf (__a, __b);
+#endif
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubq_f16 (float16x8_t __a, float16x8_t __b)
{
+#ifdef __FAST_MATH__
+ return __a - __b;
+#else
return __builtin_neon_vsubv8hf (__a, __b);
+#endif
}
#endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 50d89eb..0ce3fe4 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -505,6 +505,23 @@
(const_string "neon_add<q>")))]
)
+;; As with SFmode, full support for HFmode vector arithmetic is only available
+;; when flag-unsafe-math-optimizations is enabled.
+
+(define_insn "add<mode>3"
+ [(set
+ (match_operand:VH 0 "s_register_operand" "=w")
+ (plus:VH
+ (match_operand:VH 1 "s_register_operand" "w")
+ (match_operand:VH 2 "s_register_operand" "w")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "type")
+ (if_then_else (match_test "<Is_float_mode>")
+ (const_string "neon_fp_addsub_s<q>")
+ (const_string "neon_add<q>")))]
+)
+
(define_insn "add<mode>3_fp16"
[(set
(match_operand:VH 0 "s_register_operand" "=w")
@@ -557,6 +574,17 @@
(const_string "neon_sub<q>")))]
)
+(define_insn "sub<mode>3"
+ [(set
+ (match_operand:VH 0 "s_register_operand" "=w")
+ (minus:VH
+ (match_operand:VH 1 "s_register_operand" "w")
+ (match_operand:VH 2 "s_register_operand" "w")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_sub<q>")]
+)
+
(define_insn "sub<mode>3_fp16"
[(set
(match_operand:VH 0 "s_register_operand" "=w")
@@ -664,8 +692,17 @@
[(set_attr "type" "neon_fp_mla_s<q>")]
)
-;; There is limited support for unsafe-math optimizations using the NEON FP16
-;; arithmetic instructions, so only the intrinsic is currently supported.
+(define_insn "fma<VH:mode>4"
+ [(set (match_operand:VH 0 "register_operand" "=w")
+ (fma:VH
+ (match_operand:VH 1 "register_operand" "w")
+ (match_operand:VH 2 "register_operand" "w")
+ (match_operand:VH 3 "register_operand" "0")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_fp_mla_s<q>")]
+)
+
(define_insn "fma<VH:mode>4_intrinsic"
[(set (match_operand:VH 0 "register_operand" "=w")
(fma:VH
@@ -2175,6 +2212,17 @@
(const_string "neon_mul_<V_elem_ch><q>")))]
)
+(define_insn "mul<mode>3"
+ [(set
+ (match_operand:VH 0 "s_register_operand" "=w")
+ (mult:VH
+ (match_operand:VH 1 "s_register_operand" "w")
+ (match_operand:VH 2 "s_register_operand" "w")))]
+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
+)
+
(define_insn "neon_vmulf<mode>"
[(set
(match_operand:VH 0 "s_register_operand" "=w")