diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2021-06-09 16:07:43 +0000 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@linaro.org> | 2021-06-09 16:07:43 +0000 |
commit | 7969d9c83d061e57ea80795768469cffb1a859f8 (patch) | |
tree | 95c092922d62441444d7909210507f2ae410272d | |
parent | 880198da50e1beac9b7cf8ff1bff570359c5f2a0 (diff) | |
download | gcc-7969d9c83d061e57ea80795768469cffb1a859f8.zip gcc-7969d9c83d061e57ea80795768469cffb1a859f8.tar.gz gcc-7969d9c83d061e57ea80795768469cffb1a859f8.tar.bz2 |
arm: Auto-vectorization for MVE: vclz
This patch adds support for auto-vectorization of clz for MVE.
It does so by removing the unspec from mve_vclzq_<supf><mode> and uses
'clz' instead. It moves to neon_vclz<mode> expander from neon.md to
vec-common.md and renames it into the standard name clz<mode>2.
2021-06-09 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S.
(VCLZQ): Remove.
* config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix,
remove <supf> iterator.
(mve_vclzq_u<mode>): New.
* config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>.
(neon_vclz<mode): Move to ...
* config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove.
* config/arm/vec-common.md: ... here. Add support for MVE.
gcc/testsuite/
* gcc.target/arm/simd/mve-vclz.c: New test.
-rw-r--r-- | gcc/config/arm/iterators.md | 3 | ||||
-rw-r--r-- | gcc/config/arm/mve.md | 12 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 11 | ||||
-rw-r--r-- | gcc/config/arm/unspecs.md | 2 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 |
6 files changed, 46 insertions, 17 deletions
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 3042baf..5c4fe89 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1288,7 +1288,7 @@ (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") + (VREV32Q_U "u") (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") @@ -1538,7 +1538,6 @@ (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 04aa612..99e46d0 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -435,16 +435,22 @@ ;; ;; [vclzq_u, vclzq_s]) ;; -(define_insn "mve_vclzq_<supf><mode>" +(define_insn "@mve_vclzq_s<mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] - VCLZQ)) + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" "vclz.i%#<V_sz_elem> %q0, %q1" [(set_attr "type" "mve_move") ]) +(define_expand "mve_vclzq_u<mode>" + [ + (set (match_operand:MVE_2 0 "s_register_operand") + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) + ] + "TARGET_HAVE_MVE" +) ;; ;; [vclsq_s]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 18571d8..0fdffaf 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3018,7 +3018,7 @@ [(set_attr "type" "neon_cls<q>")] ) -(define_insn "clz<mode>2" +(define_insn "neon_vclz<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" @@ -3026,15 +3026,6 @@ [(set_attr "type" "neon_cnt<q>")] ) -(define_expand "neon_vclz<mode>" - [(match_operand:VDQIW 0 "s_register_operand") - (match_operand:VDQIW 1 "s_register_operand")] - "TARGET_NEON" -{ - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); - DONE; -}) - (define_insn "popcount<mode>2" [(set (match_operand:VE 0 "s_register_operand" "=w") (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ed1bc29..ad1c6ed 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -556,8 +556,6 @@ VQABSQ_S VDUPQ_N_U VDUPQ_N_S - VCLZQ_U - VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 2779c1a..430a92c 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -625,3 +625,10 @@ operands[0], operands[1], operands[2])); DONE; }) + +(define_expand "clz<mode>2" + [(set (match_operand:VDQIW 0 "s_register_operand") + (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT" +) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c new file mode 100644 index 0000000..7068736 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +#define FUNC(SIGN, TYPE, BITS, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ + } \ +} + +FUNC(s, int, 32, clz) +FUNC(u, uint, 32, clz) +FUNC(s, int, 16, clz) +FUNC(u, uint, 16, clz) +FUNC(s, int, 8, clz) +FUNC(u, uint, 8, clz) + +/* 16 and 8-bit versions are not vectorized because they need pack/unpack + patterns since __builtin_clz uses 32-bit parameter and return value. */ +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ |