diff options
author | Robin Dapp <rdapp@ventanamicro.com> | 2024-10-17 11:33:19 +0200 |
---|---|---|
committer | Robin Dapp <rdapp@ventanamicro.com> | 2024-12-13 10:12:40 +0100 |
commit | 528567a7b1589735408eaa133206a0683162188e (patch) | |
tree | 91305cc91400531b94f1b0c5e2a20d053478db56 /gcc | |
parent | cff3050a4fbec323629563b87c9a83bf3e7be908 (diff) | |
download | gcc-528567a7b1589735408eaa133206a0683162188e.zip gcc-528567a7b1589735408eaa133206a0683162188e.tar.gz gcc-528567a7b1589735408eaa133206a0683162188e.tar.bz2 |
RISC-V: Add even/odd vec_perm_const pattern.
This adds handling for even/odd patterns.
gcc/ChangeLog:
* config/riscv/riscv-v.cc (shuffle_even_odd_patterns): New
function.
(expand_vec_perm_const_1): Use new function.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c: New test.
Diffstat (limited to 'gcc')
3 files changed, 256 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index d58632b..517a016 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3572,6 +3572,70 @@ shuffle_interleave_patterns (struct expand_vec_perm_d *d) return true; } + +/* Recognize even/odd patterns like [0 2 4 6]. We use two compress + and one slideup. */ + +static bool +shuffle_even_odd_patterns (struct expand_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + poly_int64 vec_len = d->perm.length (); + int n_patterns = d->perm.encoding ().npatterns (); + + if (n_patterns != 1) + return false; + + if (!vec_len.is_constant ()) + return false; + + int vlen = vec_len.to_constant (); + if (vlen < 4 || vlen > 64) + return false; + + if (d->one_vector_p) + return false; + + bool even = true; + if (!d->perm.series_p (0, 1, 0, 2)) + { + even = false; + if (!d->perm.series_p (0, 1, 1, 2)) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + machine_mode mask_mode = get_mask_mode (vmode); + rvv_builder builder (mask_mode, vlen, 1); + int bit = even ? 0 : 1; + for (int i = 0; i < vlen; i++) + { + bit ^= 1; + if (bit) + builder.quick_push (CONST1_RTX (BImode)); + else + builder.quick_push (CONST0_RTX (BImode)); + } + rtx mask = force_reg (mask_mode, builder.build ()); + + insn_code icode = code_for_pred_compress (vmode); + rtx ops1[] = {d->target, d->op0, mask}; + emit_vlmax_insn (icode, COMPRESS_OP, ops1); + + rtx tmp2 = gen_reg_rtx (vmode); + rtx ops2[] = {tmp2, d->op1, mask}; + emit_vlmax_insn (icode, COMPRESS_OP, ops2); + + rtx ops[] = {d->target, d->target, tmp2, gen_int_mode (vlen / 2, Pmode)}; + icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode); + emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops); + + return true; +} + /* Recognize decompress patterns: 1. VEC_PERM_EXPR op0 and op1 @@ -3890,6 +3954,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (shuffle_interleave_patterns (d)) return true; + if (shuffle_even_odd_patterns (d)) + return true; if (shuffle_compress_patterns (d)) return true; if (shuffle_decompress_patterns (d)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c new file mode 100644 index 0000000..ff1ffa4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c @@ -0,0 +1,122 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-O3 -mrvv-max-lmul=m8 -std=gnu99" } */ + +#include "shuffle-evenodd.c" + +#define SERIES_2(x, y) (x), (x + 1) +#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y) +#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y) +#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y) +#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y) +#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y) + +#define comp(a, b, n) \ + for (unsigned i = 0; i < n; ++i) \ + if ((a)[i] != (b)[i]) \ + __builtin_abort (); + +#define CHECK1(TYPE, NUNITS) \ + __attribute__ ((noipa)) void check1_##TYPE () \ + { \ + TYPE v0 = (TYPE){SERIES_##NUNITS (0, NUNITS)}; \ + TYPE v1 = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)}; \ + TYPE ref = (TYPE){MASKE_##NUNITS (0, NUNITS)}; \ + TYPE res; \ + permute1_##TYPE (v0, v1, &res); \ + comp (res, ref, NUNITS); \ + } + +#define CHECK2(TYPE, NUNITS) \ + __attribute__ ((noipa)) void check2_##TYPE () \ + { \ + TYPE v0 = (TYPE){SERIES_##NUNITS (0, NUNITS)}; \ + TYPE v1 = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)}; \ + TYPE ref = (TYPE){MASKO_##NUNITS (0, NUNITS)}; \ + TYPE res; \ + permute2_##TYPE (v0, v1, &res); \ + comp (res, ref, NUNITS); \ + } + +#define CHECK_ALL(T) \ + T (vnx4qi, 4) \ + T (vnx8qi, 8) \ + T (vnx16qi, 16) \ + T (vnx32qi, 32) \ + T (vnx64qi, 64) \ + T (vnx4hi, 4) \ + T (vnx8hi, 8) \ + T (vnx16hi, 16) \ + T (vnx32hi, 32) \ + T (vnx64hi, 64) \ + T (vnx4si, 4) \ + T (vnx8si, 8) \ + T (vnx16si, 16) \ + T (vnx32si, 32) \ + T (vnx4di, 4) \ + T (vnx8di, 8) \ + T (vnx16di, 16) \ + T (vnx4sf, 4) \ + T (vnx8sf, 8) \ + T (vnx16sf, 16) \ + T (vnx32sf, 32) \ + T (vnx4df, 4) \ + T (vnx8df, 8) \ + T (vnx16df, 16) + +CHECK_ALL (CHECK1) +CHECK_ALL (CHECK2) + +int +main () +{ + check1_vnx4qi (); + check1_vnx8qi (); + check1_vnx16qi (); + check1_vnx32qi (); + check1_vnx64qi (); + check1_vnx4hi (); + check1_vnx8hi (); + check1_vnx16hi (); + check1_vnx32hi (); + check1_vnx64hi (); + check1_vnx4si (); + check1_vnx8si (); + check1_vnx16si (); + check1_vnx32si (); + check1_vnx4di (); + check1_vnx8di (); + check1_vnx16di (); + check1_vnx4sf (); + check1_vnx8sf (); + check1_vnx16sf (); + check1_vnx32sf (); + check1_vnx4df (); + check1_vnx8df (); + check1_vnx16df (); + check2_vnx4qi (); + check2_vnx8qi (); + check2_vnx16qi (); + check2_vnx32qi (); + check2_vnx64qi (); + check2_vnx4hi (); + check2_vnx8hi (); + check2_vnx16hi (); + check2_vnx32hi (); + check2_vnx64hi (); + check2_vnx4si (); + check2_vnx8si (); + check2_vnx16si (); + check2_vnx32si (); + check2_vnx4di (); + check2_vnx8di (); + check2_vnx16di (); + check2_vnx4sf (); + check2_vnx8sf (); + check2_vnx16sf (); + check2_vnx32sf (); + check2_vnx4df (); + check2_vnx8df (); + check2_vnx16df (); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c new file mode 100644 index 0000000..6db4c5a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m8" } */ + +#include "perm.h" + +#define MASKE_2(x, y) (x), (x + 2) +#define MASKE_4(x, y) MASKE_2 (x, y), MASKE_2 (x + 4, y) +#define MASKE_8(x, y) MASKE_4 (x, y), MASKE_4 (x + 8, y) +#define MASKE_16(x, y) MASKE_8 (x, y), MASKE_8 (x + 16, y) +#define MASKE_32(x, y) MASKE_16 (x, y), MASKE_16 (x + 32, y) +#define MASKE_64(x, y) MASKE_32 (x, y), MASKE_32 (x + 64, y) + +#define MASKO_2(x, y) (x + 1), (x + 3) +#define MASKO_4(x, y) MASKO_2 (x, y), MASKO_2 (x + 4, y) +#define MASKO_8(x, y) MASKO_4 (x, y), MASKO_4 (x + 8, y) +#define MASKO_16(x, y) MASKO_8 (x, y), MASKO_8 (x + 16, y) +#define MASKO_32(x, y) MASKO_16 (x, y), MASKO_16 (x + 32, y) +#define MASKO_64(x, y) MASKO_32 (x, y), MASKO_32 (x + 64, y) + +#define PERMUTE1(TYPE, NUNITS) \ + __attribute__ ((noipa)) void permute1_##TYPE (TYPE values1, TYPE values2, \ + TYPE *out) \ + { \ + TYPE v = __builtin_shufflevector (values1, values2, \ + MASKE_##NUNITS (0, NUNITS)); \ + *(TYPE *) out = v; \ + } + +#define PERMUTE2(TYPE, NUNITS) \ + __attribute__ ((noipa)) void permute2_##TYPE (TYPE values1, TYPE values2, \ + TYPE *out) \ + { \ + TYPE v = __builtin_shufflevector (values1, values2, \ + MASKO_##NUNITS (0, NUNITS)); \ + *(TYPE *) out = v; \ + } + +#define TEST_ALL(T) \ + T (vnx4qi, 4) \ + T (vnx8qi, 8) \ + T (vnx16qi, 16) \ + T (vnx32qi, 32) \ + T (vnx64qi, 64) \ + T (vnx4hi, 4) \ + T (vnx8hi, 8) \ + T (vnx16hi, 16) \ + T (vnx32hi, 32) \ + T (vnx64hi, 64) \ + T (vnx4si, 4) \ + T (vnx8si, 8) \ + T (vnx16si, 16) \ + T (vnx32si, 32) \ + T (vnx4di, 4) \ + T (vnx8di, 8) \ + T (vnx16di, 16) \ + T (vnx4sf, 4) \ + T (vnx8sf, 8) \ + T (vnx16sf, 16) \ + T (vnx32sf, 32) \ + T (vnx4df, 4) \ + T (vnx8df, 8) \ + T (vnx16df, 16) + +TEST_ALL (PERMUTE1) +TEST_ALL (PERMUTE2) + +/* { dg-final { scan-assembler-times "vslideup" 48 } } */ +/* { dg-final { scan-assembler-times "vcompress" 96 } } */ |