aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRobin Dapp <rdapp@ventanamicro.com>2024-10-17 11:33:19 +0200
committerRobin Dapp <rdapp@ventanamicro.com>2024-12-13 10:12:40 +0100
commit528567a7b1589735408eaa133206a0683162188e (patch)
tree91305cc91400531b94f1b0c5e2a20d053478db56 /gcc
parentcff3050a4fbec323629563b87c9a83bf3e7be908 (diff)
downloadgcc-528567a7b1589735408eaa133206a0683162188e.zip
gcc-528567a7b1589735408eaa133206a0683162188e.tar.gz
gcc-528567a7b1589735408eaa133206a0683162188e.tar.bz2
RISC-V: Add even/odd vec_perm_const pattern.
This adds handling for even/odd patterns. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_even_odd_patterns): New function. (expand_vec_perm_const_1): Use new function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-v.cc66
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c122
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c68
3 files changed, 256 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index d58632b..517a016 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3572,6 +3572,70 @@ shuffle_interleave_patterns (struct expand_vec_perm_d *d)
return true;
}
+
+/* Recognize even/odd patterns like [0 2 4 6]. We use two compress
+ and one slideup. */
+
+static bool
+shuffle_even_odd_patterns (struct expand_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+ poly_int64 vec_len = d->perm.length ();
+ int n_patterns = d->perm.encoding ().npatterns ();
+
+ if (n_patterns != 1)
+ return false;
+
+ if (!vec_len.is_constant ())
+ return false;
+
+ int vlen = vec_len.to_constant ();
+ if (vlen < 4 || vlen > 64)
+ return false;
+
+ if (d->one_vector_p)
+ return false;
+
+ bool even = true;
+ if (!d->perm.series_p (0, 1, 0, 2))
+ {
+ even = false;
+ if (!d->perm.series_p (0, 1, 1, 2))
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ machine_mode mask_mode = get_mask_mode (vmode);
+ rvv_builder builder (mask_mode, vlen, 1);
+ int bit = even ? 0 : 1;
+ for (int i = 0; i < vlen; i++)
+ {
+ bit ^= 1;
+ if (bit)
+ builder.quick_push (CONST1_RTX (BImode));
+ else
+ builder.quick_push (CONST0_RTX (BImode));
+ }
+ rtx mask = force_reg (mask_mode, builder.build ());
+
+ insn_code icode = code_for_pred_compress (vmode);
+ rtx ops1[] = {d->target, d->op0, mask};
+ emit_vlmax_insn (icode, COMPRESS_OP, ops1);
+
+ rtx tmp2 = gen_reg_rtx (vmode);
+ rtx ops2[] = {tmp2, d->op1, mask};
+ emit_vlmax_insn (icode, COMPRESS_OP, ops2);
+
+ rtx ops[] = {d->target, d->target, tmp2, gen_int_mode (vlen / 2, Pmode)};
+ icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+ emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+
+ return true;
+}
+
/* Recognize decompress patterns:
1. VEC_PERM_EXPR op0 and op1
@@ -3890,6 +3954,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
if (shuffle_interleave_patterns (d))
return true;
+ if (shuffle_even_odd_patterns (d))
+ return true;
if (shuffle_compress_patterns (d))
return true;
if (shuffle_decompress_patterns (d))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c
new file mode 100644
index 0000000..ff1ffa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c
@@ -0,0 +1,122 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 -mrvv-max-lmul=m8 -std=gnu99" } */
+
+#include "shuffle-evenodd.c"
+
+#define SERIES_2(x, y) (x), (x + 1)
+#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y)
+#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y)
+#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y)
+#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y)
+#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y)
+
+#define comp(a, b, n) \
+ for (unsigned i = 0; i < n; ++i) \
+ if ((a)[i] != (b)[i]) \
+ __builtin_abort ();
+
+#define CHECK1(TYPE, NUNITS) \
+ __attribute__ ((noipa)) void check1_##TYPE () \
+ { \
+ TYPE v0 = (TYPE){SERIES_##NUNITS (0, NUNITS)}; \
+ TYPE v1 = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)}; \
+ TYPE ref = (TYPE){MASKE_##NUNITS (0, NUNITS)}; \
+ TYPE res; \
+ permute1_##TYPE (v0, v1, &res); \
+ comp (res, ref, NUNITS); \
+ }
+
+#define CHECK2(TYPE, NUNITS) \
+ __attribute__ ((noipa)) void check2_##TYPE () \
+ { \
+ TYPE v0 = (TYPE){SERIES_##NUNITS (0, NUNITS)}; \
+ TYPE v1 = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)}; \
+ TYPE ref = (TYPE){MASKO_##NUNITS (0, NUNITS)}; \
+ TYPE res; \
+ permute2_##TYPE (v0, v1, &res); \
+ comp (res, ref, NUNITS); \
+ }
+
+#define CHECK_ALL(T) \
+ T (vnx4qi, 4) \
+ T (vnx8qi, 8) \
+ T (vnx16qi, 16) \
+ T (vnx32qi, 32) \
+ T (vnx64qi, 64) \
+ T (vnx4hi, 4) \
+ T (vnx8hi, 8) \
+ T (vnx16hi, 16) \
+ T (vnx32hi, 32) \
+ T (vnx64hi, 64) \
+ T (vnx4si, 4) \
+ T (vnx8si, 8) \
+ T (vnx16si, 16) \
+ T (vnx32si, 32) \
+ T (vnx4di, 4) \
+ T (vnx8di, 8) \
+ T (vnx16di, 16) \
+ T (vnx4sf, 4) \
+ T (vnx8sf, 8) \
+ T (vnx16sf, 16) \
+ T (vnx32sf, 32) \
+ T (vnx4df, 4) \
+ T (vnx8df, 8) \
+ T (vnx16df, 16)
+
+CHECK_ALL (CHECK1)
+CHECK_ALL (CHECK2)
+
+int
+main ()
+{
+ check1_vnx4qi ();
+ check1_vnx8qi ();
+ check1_vnx16qi ();
+ check1_vnx32qi ();
+ check1_vnx64qi ();
+ check1_vnx4hi ();
+ check1_vnx8hi ();
+ check1_vnx16hi ();
+ check1_vnx32hi ();
+ check1_vnx64hi ();
+ check1_vnx4si ();
+ check1_vnx8si ();
+ check1_vnx16si ();
+ check1_vnx32si ();
+ check1_vnx4di ();
+ check1_vnx8di ();
+ check1_vnx16di ();
+ check1_vnx4sf ();
+ check1_vnx8sf ();
+ check1_vnx16sf ();
+ check1_vnx32sf ();
+ check1_vnx4df ();
+ check1_vnx8df ();
+ check1_vnx16df ();
+ check2_vnx4qi ();
+ check2_vnx8qi ();
+ check2_vnx16qi ();
+ check2_vnx32qi ();
+ check2_vnx64qi ();
+ check2_vnx4hi ();
+ check2_vnx8hi ();
+ check2_vnx16hi ();
+ check2_vnx32hi ();
+ check2_vnx64hi ();
+ check2_vnx4si ();
+ check2_vnx8si ();
+ check2_vnx16si ();
+ check2_vnx32si ();
+ check2_vnx4di ();
+ check2_vnx8di ();
+ check2_vnx16di ();
+ check2_vnx4sf ();
+ check2_vnx8sf ();
+ check2_vnx16sf ();
+ check2_vnx32sf ();
+ check2_vnx4df ();
+ check2_vnx8df ();
+ check2_vnx16df ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c
new file mode 100644
index 0000000..6db4c5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m8" } */
+
+#include "perm.h"
+
+#define MASKE_2(x, y) (x), (x + 2)
+#define MASKE_4(x, y) MASKE_2 (x, y), MASKE_2 (x + 4, y)
+#define MASKE_8(x, y) MASKE_4 (x, y), MASKE_4 (x + 8, y)
+#define MASKE_16(x, y) MASKE_8 (x, y), MASKE_8 (x + 16, y)
+#define MASKE_32(x, y) MASKE_16 (x, y), MASKE_16 (x + 32, y)
+#define MASKE_64(x, y) MASKE_32 (x, y), MASKE_32 (x + 64, y)
+
+#define MASKO_2(x, y) (x + 1), (x + 3)
+#define MASKO_4(x, y) MASKO_2 (x, y), MASKO_2 (x + 4, y)
+#define MASKO_8(x, y) MASKO_4 (x, y), MASKO_4 (x + 8, y)
+#define MASKO_16(x, y) MASKO_8 (x, y), MASKO_8 (x + 16, y)
+#define MASKO_32(x, y) MASKO_16 (x, y), MASKO_16 (x + 32, y)
+#define MASKO_64(x, y) MASKO_32 (x, y), MASKO_32 (x + 64, y)
+
+#define PERMUTE1(TYPE, NUNITS) \
+ __attribute__ ((noipa)) void permute1_##TYPE (TYPE values1, TYPE values2, \
+ TYPE *out) \
+ { \
+ TYPE v = __builtin_shufflevector (values1, values2, \
+ MASKE_##NUNITS (0, NUNITS)); \
+ *(TYPE *) out = v; \
+ }
+
+#define PERMUTE2(TYPE, NUNITS) \
+ __attribute__ ((noipa)) void permute2_##TYPE (TYPE values1, TYPE values2, \
+ TYPE *out) \
+ { \
+ TYPE v = __builtin_shufflevector (values1, values2, \
+ MASKO_##NUNITS (0, NUNITS)); \
+ *(TYPE *) out = v; \
+ }
+
+#define TEST_ALL(T) \
+ T (vnx4qi, 4) \
+ T (vnx8qi, 8) \
+ T (vnx16qi, 16) \
+ T (vnx32qi, 32) \
+ T (vnx64qi, 64) \
+ T (vnx4hi, 4) \
+ T (vnx8hi, 8) \
+ T (vnx16hi, 16) \
+ T (vnx32hi, 32) \
+ T (vnx64hi, 64) \
+ T (vnx4si, 4) \
+ T (vnx8si, 8) \
+ T (vnx16si, 16) \
+ T (vnx32si, 32) \
+ T (vnx4di, 4) \
+ T (vnx8di, 8) \
+ T (vnx16di, 16) \
+ T (vnx4sf, 4) \
+ T (vnx8sf, 8) \
+ T (vnx16sf, 16) \
+ T (vnx32sf, 32) \
+ T (vnx4df, 4) \
+ T (vnx8df, 8) \
+ T (vnx16df, 16)
+
+TEST_ALL (PERMUTE1)
+TEST_ALL (PERMUTE2)
+
+/* { dg-final { scan-assembler-times "vslideup" 48 } } */
+/* { dg-final { scan-assembler-times "vcompress" 96 } } */