aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Dapp <rdapp@ventanamicro.com>2024-09-26 11:56:08 +0200
committerRobin Dapp <rdapp@ventanamicro.com>2024-11-19 12:23:12 +0100
commita18592e1c30f0f539c71fa632c49cb82008ec45a (patch)
treeabf6f2a6646e1b2e1ef14d4d68d906213a50dbcc
parent387dba05e4207fc3f9a2f2bcb09a343a999c76fc (diff)
downloadgcc-a18592e1c30f0f539c71fa632c49cb82008ec45a.zip
gcc-a18592e1c30f0f539c71fa632c49cb82008ec45a.tar.gz
gcc-a18592e1c30f0f539c71fa632c49cb82008ec45a.tar.bz2
RISC-V: Load VLS perm indices directly from memory.
Instead of loading the permutation indices and using vmslt in order to determine which elements belong to which source vector we can compute the proper mask at compile time. That way we can emit vlm instead of vle + vmslt. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_merge_patterns): Load VLS indices directly. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/merge-1.c: Check for vlm and no vmsleu etc. * gcc.target/riscv/rvv/autovec/vls/merge-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-6.c: Ditto.
-rw-r--r--gcc/config/riscv/riscv-v.cc22
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c2
7 files changed, 32 insertions, 2 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a0e22b6..ee7a012 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3101,9 +3101,27 @@ shuffle_merge_patterns (struct expand_vec_perm_d *d)
machine_mode mask_mode = get_mask_mode (vmode);
rtx mask = gen_reg_rtx (mask_mode);
- if (indices_fit_selector_p)
+ if (indices_fit_selector_p && vec_len.is_constant ())
{
- /* MASK = SELECTOR < NUNITS ? 1 : 0. */
+ /* For a constant vector length we can generate the needed mask at
+ compile time and load it as mask at runtime.
+ This saves a compare at runtime. */
+ rtx_vector_builder sel (mask_mode, d->perm.encoding ().npatterns (),
+ d->perm.encoding ().nelts_per_pattern ());
+ unsigned int encoded_nelts = sel.encoded_nelts ();
+ for (unsigned int i = 0; i < encoded_nelts; i++)
+ sel.quick_push (gen_int_mode (d->perm[i].to_constant ()
+ < vec_len.to_constant (),
+ GET_MODE_INNER (mask_mode)));
+ mask = sel.build ();
+ }
+ else if (indices_fit_selector_p)
+ {
+ /* For a dynamic vector length < 256 we keep the permutation
+ indices in the literal pool, load it at runtime and create the
+ mask by selecting either OP0 or OP1 by
+
+ INDICES < NUNITS ? 1 : 0. */
rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
insn_code icode = code_for_pred_cmp_scalar (sel_mode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
index cd24922..c34734c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
@@ -4,3 +4,5 @@
#include "../vls-vlmax/merge-1.c"
/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
index 52d9124..68f7b62 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
@@ -4,3 +4,5 @@
#include "../vls-vlmax/merge-2.c"
/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
index 4931d2a..1250dca 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
@@ -4,3 +4,5 @@
#include "../vls-vlmax/merge-3.c"
/* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
index f22a18f..1dfd828 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
@@ -4,3 +4,5 @@
#include "../vls-vlmax/merge-4.c"
/* dg-final scan-assembler-times {\tvmerge.vvm} 11 */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
index cf8d04c..af84a65 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
@@ -4,3 +4,5 @@
#include "../vls-vlmax/merge-5.c"
/* { dg-final { scan-assembler-times {\tvmerge.vvm} 8 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c
index 3b6f977..45e9998 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c
@@ -4,3 +4,5 @@
#include "../vls-vlmax/merge-6.c"
/* { dg-final { scan-assembler-times {\tvmerge.vvm} 5 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 5 } } */