aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2020-10-02 11:53:05 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2020-10-02 11:53:05 +0100
commitbb78e5876aa6a6b4a8158cdc0f6c8511eb2be75f (patch)
tree516b84f00dd233c993a7bf0416d7b402ab2f651f /gcc
parent01c288035aa960631dd0ffd9131ed0a824a95f30 (diff)
downloadgcc-bb78e5876aa6a6b4a8158cdc0f6c8511eb2be75f.zip
gcc-bb78e5876aa6a6b4a8158cdc0f6c8511eb2be75f.tar.gz
gcc-bb78e5876aa6a6b4a8158cdc0f6c8511eb2be75f.tar.bz2
arm: Make more use of the new mode macros
As Christophe pointed out, my r11-3522 patch didn't in fact fix all of the armv8_2-fp16-arith-2.c failures introduced by allowing FP16 vectorisation without -funsafe-math-optimizations. I must have only tested the final patch on my usual arm-linux-gnueabihf bootstrap, which it turns out treats the test as unsupported. The focus of the original patch was to use mode macros for patterns that are shared between Advanced SIMD, iwMMXt and MVE. This patch uses the mode macros for general neon.md patterns too. gcc/ * config/arm/neon.md (*sub<VDQ:mode>3_neon): Use the new mode macros for the insn condition. (sub<VH:mode>3, *mul<VDQW:mode>3_neon): Likewise. (mul<VDQW:mode>3add<VDQW:mode>_neon): Likewise. (mul<VH:mode>3add<VH:mode>_neon): Likewise. (mul<VDQW:mode>3neg<VDQW:mode>add<VDQW:mode>_neon): Likewise. (fma<VCVTF:mode>4, fma<VH:mode>4, *fmsub<VCVTF:mode>4): Likewise. (quad_halves_<code>v4sf, reduc_plus_scal_<VD:mode>): Likewise. (reduc_plus_scal_<VQ:mode>, reduc_smin_scal_<VD:mode>): Likewise. (reduc_smin_scal_<VQ:mode>, reduc_smax_scal_<VD:mode>): Likewise. (reduc_smax_scal_<VQ:mode>, mul<VH:mode>3): Likewise. (neon_vabd<VF:mode>_2, neon_vabd<VF:mode>_3): Likewise. (fma<VH:mode>4_intrinsic): Delete. (neon_vadd<VCVTF:mode>): Use the new mode macros to decide which form of instruction to generate. (neon_vmla<VDQW:mode>, neon_vmls<VDQW:mode>): Likewise. (neon_vsub<VCVTF:mode>): Likewise. (neon_vfma<VH:mode>): Generate the main fma<mode>4 form instead of using fma<mode>4_intrinsic. gcc/testsuite/ * gcc.target/arm/armv8_2-fp16-arith-2.c (float16_t): Use _Float16_t rather than __fp16. (float16x4_t, float16x4_t): Likewise. (fp16_abs): Use __builtin_fabsf16.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/neon.md64
-rw-r--r--gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c8
2 files changed, 29 insertions, 43 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 58832cb..85e424e 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -513,7 +513,7 @@
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -527,7 +527,7 @@
(minus:VH
(match_operand:VH 1 "s_register_operand" "w")
(match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_sub<q>")]
)
@@ -547,7 +547,7 @@
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
(match_operand:VDQW 2 "s_register_operand" "w")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -592,7 +592,7 @@
(plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
(match_operand:VDQW 3 "s_register_operand" "w"))
(match_operand:VDQW 1 "s_register_operand" "0")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -605,7 +605,7 @@
(plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
(match_operand:VH 3 "s_register_operand" "w"))
(match_operand:VH 1 "s_register_operand" "0")))]
- "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -615,7 +615,7 @@
(minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
(mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
(match_operand:VDQW 3 "s_register_operand" "w"))))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "type")
(if_then_else (match_test "<Is_float_mode>")
@@ -633,7 +633,7 @@
(fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
(match_operand:VCVTF 2 "register_operand" "w")
(match_operand:VCVTF 3 "register_operand" "0")))]
- "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
"vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -654,18 +654,7 @@
(match_operand:VH 1 "register_operand" "w")
(match_operand:VH 2 "register_operand" "w")
(match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_mla_s<q>")]
-)
-
-(define_insn "fma<VH:mode>4_intrinsic"
- [(set (match_operand:VH 0 "register_operand" "=w")
- (fma:VH
- (match_operand:VH 1 "register_operand" "w")
- (match_operand:VH 2 "register_operand" "w")
- (match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -675,7 +664,7 @@
(fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
(match_operand:VCVTF 2 "register_operand" "w")
(match_operand:VCVTF 3 "register_operand" "0")))]
- "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
"vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -1195,7 +1184,7 @@
(parallel [(const_int 0) (const_int 1)]))
(vec_select:V2SF (match_dup 1)
(parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_V4SF_ARITH"
"<VQH_mnem>.f32\t%P0, %e1, %f1"
[(set_attr "vqh_mnem" "<VQH_mnem>")
(set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
@@ -1262,7 +1251,7 @@
(define_expand "reduc_plus_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VD 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
{
rtx vec = gen_reg_rtx (<MODE>mode);
neon_pairwise_reduce (vec, operands[1], <MODE>mode,
@@ -1275,8 +1264,7 @@
(define_expand "reduc_plus_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VQ 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
+ "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
@@ -1311,7 +1299,7 @@
(define_expand "reduc_smin_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VD 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
{
rtx vec = gen_reg_rtx (<MODE>mode);
@@ -1325,8 +1313,7 @@
(define_expand "reduc_smin_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VQ 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
+ "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
@@ -1339,7 +1326,7 @@
(define_expand "reduc_smax_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VD 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
{
rtx vec = gen_reg_rtx (<MODE>mode);
neon_pairwise_reduce (vec, operands[1], <MODE>mode,
@@ -1352,8 +1339,7 @@
(define_expand "reduc_smax_scal_<mode>"
[(match_operand:<V_elem> 0 "nonimmediate_operand")
(match_operand:VQ 1 "s_register_operand")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
+ "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
@@ -1627,7 +1613,7 @@
(match_operand:VCVTF 2 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
else
emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
@@ -1752,7 +1738,7 @@
(mult:VH
(match_operand:VH 1 "s_register_operand" "w")
(match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
)
@@ -1775,7 +1761,7 @@
(match_operand:VDQW 3 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
operands[2], operands[3]));
else
@@ -1803,8 +1789,8 @@
(match_operand:VH 3 "s_register_operand")]
"TARGET_NEON_FP16INST"
{
- emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
- operands[1]));
+ emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
+ operands[1]));
DONE;
})
@@ -2266,7 +2252,7 @@
(match_operand:VDQW 3 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
operands[1], operands[2], operands[3]));
else
@@ -2373,7 +2359,7 @@
(match_operand:VCVTF 2 "s_register_operand")]
"TARGET_NEON"
{
- if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ if (ARM_HAVE_NEON_<MODE>_ARITH)
emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
else
emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
@@ -6462,7 +6448,7 @@ if (BYTES_BIG_ENDIAN)
[(set (match_operand:VF 0 "s_register_operand" "=w")
(abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
(match_operand:VF 2 "s_register_operand" "w"))))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_abd_s<q>")]
)
@@ -6472,7 +6458,7 @@ if (BYTES_BIG_ENDIAN)
(abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
(match_operand:VF 2 "s_register_operand" "w")]
UNSPEC_VSUB)))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
"vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_abd_s<q>")]
)
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c
index 81bad22..f94109c 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c
@@ -6,9 +6,9 @@
/* Test instructions generated for half-precision arithmetic without
unsafe-math-optimizations. */
-typedef __fp16 float16_t;
-typedef __simd64_float16_t float16x4_t;
-typedef __simd128_float16_t float16x8_t;
+typedef _Float16 float16_t;
+typedef _Float16 float16x4_t __attribute__ ((vector_size (8)));
+typedef _Float16 float16x8_t __attribute__ ((vector_size (16)));
typedef short int16x4_t __attribute__ ((vector_size (8)));
typedef short int int16x8_t __attribute__ ((vector_size (16)));
@@ -16,7 +16,7 @@ typedef short int int16x8_t __attribute__ ((vector_size (16)));
float16_t
fp16_abs (float16_t a)
{
- return (a < 0) ? -a : a;
+ return __builtin_fabsf16 (a);
}
#define TEST_UNOP(NAME, OPERATOR, TY) \