diff options
author | Dennis Zhang <dennis.zh@live.com> | 2020-10-23 08:53:53 +0100 |
---|---|---|
committer | Dennis Zhang <dennis.zh@live.com> | 2020-10-23 08:53:53 +0100 |
commit | 98161c248c88f873bbffba23664c540f551d89d5 (patch) | |
tree | 85788a59de2faac58f11a54183b20d7717a31bb0 /gcc | |
parent | 6fade5a6044b7102758f4ca66c8715ebc12a6306 (diff) | |
download | gcc-98161c248c88f873bbffba23664c540f551d89d5.zip gcc-98161c248c88f873bbffba23664c540f551d89d5.tar.gz gcc-98161c248c88f873bbffba23664c540f551d89d5.tar.bz2 |
arm: Auto-vectorization for MVE: vsub
This patch enables MVE vsub instructions for auto-vectorization.
The sub<mode>3 in vec-common.md is modified to use new mode macros
to include MVE extension for vectorization. MVE vsub insns in mve.md are
modified to use 'minus' instead of unspec expression to support
sub<mode>3. Use VDQ instead fo VALL to cover all supported modes. The
redundant sub<mode>3 insns in neon.md are then removed.
gcc/ChangeLog:
2020-10-23 Dennis Zhang <dennis.zhang@arm.com>
* config/arm/mve.md (mve_vsubq<mode>): New entry for vsub instruction
using expression 'minus'.
(mve_vsubq_f<mode>): Use minus instead of VSUBQ_F unspec.
* config/arm/neon.md (sub<mode>3, sub<mode>3_fp16): Removed.
(neon_vsub<mode>): Use gen_sub<mode>3 instead of gen_sub<mode>3_fp16.
* config/arm/vec-common.md (sub<mode>3): Use the new mode macros
ARM_HAVE_<MODE>_ARITH. Use iterator VDQ instead of VALL.
gcc/testsuite/ChangeLog:
* gcc.target/arm/simd/mve-vsub_1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/arm/mve.md | 16 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 24 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 16 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c | 65 |
6 files changed, 98 insertions, 37 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c00f758..8bc8f3e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2020-10-23 Dennis Zhang <dennis.zhang@arm.com> + + * config/arm/mve.md (mve_vsubq<mode>): New entry for vsub instruction + using expression 'minus'. + (mve_vsubq_f<mode>): Use minus instead of VSUBQ_F unspec. + * config/arm/neon.md (sub<mode>3, sub<mode>3_fp16): Removed. + (neon_vsub<mode>): Use gen_sub<mode>3 instead of gen_sub<mode>3_fp16. + * config/arm/vec-common.md (sub<mode>3): Use the new mode macros + ARM_HAVE_<MODE>_ARITH. Use iterator VDQ instead of VALL. + 2020-10-22 Alan Modra <amodra@gmail.com> * config/rs6000/rs6000.c (rs6000_emit_xxspltidp_v2df): Delete diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 47c34b9..ecbaaa9 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -1957,6 +1957,17 @@ [(set_attr "type" "mve_move") ]) +(define_insn "mve_vsubq<mode>" + [ + (set (match_operand:MVE_2 0 "s_register_operand" "=w") + (minus:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w"))) + ] + "TARGET_HAVE_MVE" + "vsub.i%#<V_sz_elem>\t%q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + ;; ;; [vabdq_f]) ;; @@ -2860,9 +2871,8 @@ (define_insn "mve_vsubq_f<mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VSUBQ_F)) + (minus:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vsub.f%#<V_sz_elem>\t%q0, %q1, %q2" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e459b9a..2d76769 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -521,28 +521,6 @@ (const_string "neon_sub<q>")))] ) -(define_insn "sub<mode>3" - [(set - (match_operand:VH 0 "s_register_operand" "=w") - (minus:VH - (match_operand:VH 1 "s_register_operand" "w") - (match_operand:VH 2 "s_register_operand" "w")))] - "ARM_HAVE_NEON_<MODE>_ARITH" - "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set_attr "type" "neon_sub<q>")] -) - -(define_insn "sub<mode>3_fp16" - [(set - (match_operand:VH 0 "s_register_operand" "=w") - (minus:VH - (match_operand:VH 1 "s_register_operand" "w") - (match_operand:VH 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST" - "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set_attr "type" "neon_sub<q>")] -) - (define_insn "*mul<mode>3_neon" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") @@ -1637,7 +1615,7 @@ (match_operand:VH 2 "s_register_operand")] "TARGET_NEON_FP16INST" { - emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2])); + emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 0f117d6..250e503 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -87,18 +87,12 @@ "ARM_HAVE_<MODE>_ARITH" ) -;; Vector arithmetic. Expanders are blank, then unnamed insns implement -;; patterns separately for IWMMXT and Neon. - (define_expand "sub<mode>3" - [(set (match_operand:VALL 0 "s_register_operand") - (minus:VALL (match_operand:VALL 1 "s_register_operand") - (match_operand:VALL 2 "s_register_operand")))] - "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode) - || flag_unsafe_math_optimizations)) - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" -{ -}) + [(set (match_operand:VDQ 0 "s_register_operand") + (minus:VDQ (match_operand:VDQ 1 "s_register_operand") + (match_operand:VDQ 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH" +) (define_expand "mul<mode>3" [(set (match_operand:VDQWH 0 "s_register_operand") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b9bb761..d7c7c5f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2020-10-23 Dennis Zhang <dennis.zhang@arm.com> + + * gcc.target/arm/simd/mve-vsub_1.c: New test. + 2020-10-22 Alan Modra <amodra@gmail.com> * gcc.target/powerpc/vec-splati-runnable.c: Don't abort on diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c b/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c new file mode 100644 index 0000000..cb3ef3a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg -additional-options "-O3 -funsafe-math-optimizations" } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +void test_vsub_i32 (int32_t * dest, int32_t * a, int32_t * b) { + int i; + for (i=0; i<4; i++) { + dest[i] = a[i] - b[i]; + } +} + +void test_vsub_i32_u (uint32_t * dest, uint32_t * a, uint32_t * b) { + int i; + for (i=0; i<4; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.i32\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ + +void test_vsub_i16 (int16_t * dest, int16_t * a, int16_t * b) { + int i; + for (i=0; i<8; i++) { + dest[i] = a[i] - b[i]; + } +} + +void test_vsub_i16_u (uint16_t * dest, uint16_t * a, uint16_t * b) { + int i; + for (i=0; i<8; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.i16\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ + +void test_vsub_i8 (int8_t * dest, int8_t * a, int8_t * b) { + int i; + for (i=0; i<16; i++) { + dest[i] = a[i] - b[i]; + } +} + +void test_vsub_i8_u (uint8_t * dest, uint8_t * a, uint8_t * b) { + int i; + for (i=0; i<16; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.i8\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ + +void test_vsub_f32 (float * dest, float * a, float * b) { + int i; + for (i=0; i<4; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.f32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + |