diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2021-05-17 12:31:58 +0000 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@linaro.org> | 2021-05-17 12:31:58 +0000 |
commit | 7606865198b241b4c944f66761d6506b02ead951 (patch) | |
tree | ee96349b7fccdd8f7bf8800fa1990f4c411cb325 | |
parent | a6eacbf1055520e968d1a25f6d30d6ff4b66272d (diff) | |
download | gcc-7606865198b241b4c944f66761d6506b02ead951.zip gcc-7606865198b241b4c944f66761d6506b02ead951.tar.gz gcc-7606865198b241b4c944f66761d6506b02ead951.tar.bz2 |
arm: Auto-vectorization for MVE: add __fp16 support to VCMP
This patch adds __fp16 support to the previous patch that added vcmp
support with MVE. For this we update existing expanders to use VDQWH
iterator, and add a new expander vcond<VH_cvtto><mode>. In the
process we need to create suitable iterators, and update v_cmp_result
as needed.
2021-05-17 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/arm/iterators.md (V16): New iterator.
(VH_cvtto): New iterator.
(v_cmp_result): Added V4HF and V8HF support.
* config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>): Use VDQWH.
(vcond<mode><mode>): Likewise.
(vcond_mask_<mode><v_cmp_result>): Likewise.
(vcond<VH_cvtto><mode>): New expander.
gcc/testsuite/
* gcc.target/arm/simd/mve-compare-3.c: New test with GCC vectors.
* gcc.target/arm/simd/mve-vcmp-f16.c: New test for
auto-vectorization.
* gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust since we now
vectorize float16_t vectors.
-rw-r--r-- | gcc/config/arm/iterators.md | 6 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 40 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c | 30 |
5 files changed, 116 insertions, 14 deletions
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index a128465..3042baf 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -231,6 +231,9 @@ ;; Vector modes for 16-bit floating-point support. (define_mode_iterator VH [V8HF V4HF]) +;; Modes with 16-bit elements only. +(define_mode_iterator V16 [V4HI V4HF V8HI V8HF]) + ;; 16-bit floating-point vector modes suitable for moving (includes BFmode). (define_mode_iterator VHFBF [V8HF V4HF V4BF V8BF]) @@ -571,6 +574,8 @@ ;; (Opposite) mode to convert to/from for vector-half mode conversions. (define_mode_attr VH_CVTTO [(V4HI "V4HF") (V4HF "V4HI") (V8HI "V8HF") (V8HF "V8HI")]) +(define_mode_attr VH_cvtto [(V4HI "v4hf") (V4HF "v4hi") + (V8HI "v8hf") (V8HF "v8hi")]) ;; Define element mode for each vector mode. (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") @@ -720,6 +725,7 @@ (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") (V4HI "v4hi") (V8HI "v8hi") (V2SI "v2si") (V4SI "v4si") + (V4HF "v4hi") (V8HF "v8hi") (DI "di") (V2DI "v2di") (V2SF "v2si") (V4SF "v4si")]) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 448731f..265fa40 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -366,8 +366,8 @@ (define_expand "vec_cmp<mode><v_cmp_result>" [(set (match_operand:<V_cmp_result> 0 "s_register_operand") (match_operator:<V_cmp_result> 1 "comparison_operator" - [(match_operand:VDQW 2 "s_register_operand") - (match_operand:VDQW 3 "reg_or_zero_operand")]))] + [(match_operand:VDQWH 2 "s_register_operand") + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT && (!<Is_float_mode> || flag_unsafe_math_optimizations)" @@ -399,13 +399,13 @@ ;; element-wise. (define_expand "vcond<mode><mode>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH (match_operator 3 "comparison_operator" - [(match_operand:VDQW 4 "s_register_operand") - (match_operand:VDQW 5 "reg_or_zero_operand")]) - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] + [(match_operand:VDQWH 4 "s_register_operand") + (match_operand:VDQWH 5 "reg_or_zero_operand")]) + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT && (!<Is_float_mode> || flag_unsafe_math_optimizations)" @@ -430,6 +430,22 @@ DONE; }) +(define_expand "vcond<VH_cvtto><mode>" + [(set (match_operand:<VH_CVTTO> 0 "s_register_operand") + (if_then_else:<VH_CVTTO> + (match_operator 3 "comparison_operator" + [(match_operand:V16 4 "s_register_operand") + (match_operand:V16 5 "reg_or_zero_operand")]) + (match_operand:<VH_CVTTO> 1 "s_register_operand") + (match_operand:<VH_CVTTO> 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, <V_cmp_result>mode); + DONE; +}) + (define_expand "vcondu<mode><v_cmp_result>" [(set (match_operand:VDQW 0 "s_register_operand") (if_then_else:VDQW @@ -446,11 +462,11 @@ }) (define_expand "vcond_mask_<mode><v_cmp_result>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH (match_operand:<V_cmp_result> 3 "s_register_operand") - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT && (!<Is_float_mode> || flag_unsafe_math_optimizations)" diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c index 921d26e..52b8737 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c @@ -104,8 +104,20 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) /* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */ + +/* For float16_t. */ +/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */ + +/* For float16x4_t. */ +/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+} 2 } } */ + +/* For float16x8_t. */ +/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+} 2 } } */ /* { dg-final { scan-assembler-not {vadd\.f32} } } */ /* { dg-final { scan-assembler-not {vsub\.f32} } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c new file mode 100644 index 0000000..76f81e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +/* float 16 tests. */ + +#ifndef ELEM_TYPE +#define ELEM_TYPE __fp16 +#endif +#ifndef INT_ELEM_TYPE +#define INT_ELEM_TYPE __INT16_TYPE__ +#endif + +#define COMPARE(NAME, OP) \ + int_vec \ + cmp_##NAME##_reg (vec a, vec b) \ + { \ + return a OP b; \ + } + +typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16))); +typedef ELEM_TYPE vec __attribute__((vector_size(16))); + +COMPARE (eq, ==) +COMPARE (ne, !=) +COMPARE (lt, <) +COMPARE (le, <=) +COMPARE (gt, >) +COMPARE (ge, >=) + +/* eq, ne, lt, le, gt, ge. +/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c new file mode 100644 index 0000000..dbae2d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c @@ -0,0 +1,30 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +#include <stdint.h> + +#define NB 8 + +#define FUNC(OP, NAME) \ + void test_ ## NAME ##_f (__fp16 * __restrict__ dest, __fp16 *a, __fp16 *b) { \ + int i; \ + for (i=0; i<NB; i++) { \ + dest[i] = a[i] OP b[i]; \ + } \ + } + +FUNC(==, vcmpeq) +FUNC(!=, vcmpne) +FUNC(<, vcmplt) +FUNC(<=, vcmple) +FUNC(>, vcmpgt) +FUNC(>=, vcmpge) + +/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */ |