aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-11-22 11:27:52 +0800
committerLehua Ding <lehua.ding@rivai.ai>2023-11-22 14:30:28 +0800
commitd13e59b86c5cbeec6135ada3f6dc983289cac610 (patch)
treec3bf0425e33aa9be8ff2a6a237acd4c9ebb8a840
parent2e51fff7ce02a6105aa1acff57cbbdd8a767a33f (diff)
downloadgcc-d13e59b86c5cbeec6135ada3f6dc983289cac610.zip
gcc-d13e59b86c5cbeec6135ada3f6dc983289cac610.tar.gz
gcc-d13e59b86c5cbeec6135ada3f6dc983289cac610.tar.bz2
RISC-V: Fix permutation indice mode bug
This patch fixes following FAILs on zvl512b: FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-1.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-1.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-16.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-16.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-17.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-17.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-3.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-3.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-5.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-5.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-6.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/slp_run-6.c execution test The root cause is that we are using vrgather.vv on vector QI mode which is incorrect for zvl512b since it exceed 256. Instead, we should use vrgatherei16.vv PR target/112598 gcc/ChangeLog: * config/riscv/riscv-v.cc (emit_vlmax_gather_insn): Adapt the priority. (shuffle_generic_patterns): Fix permutation indice bug. * config/riscv/vector-iterators.md: Fix VEI16 bug. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr112598-2.c: New test.
-rw-r--r--gcc/config/riscv/riscv-v.cc31
-rw-r--r--gcc/config/riscv/vector-iterators.md14
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112598-2.c24
3 files changed, 54 insertions, 15 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 541dffb..7d6d082 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -839,13 +839,13 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
insn_code icode;
machine_mode data_mode = GET_MODE (target);
machine_mode sel_mode = GET_MODE (sel);
- if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
- icode = code_for_pred_gatherei16 (data_mode);
- else if (const_vec_duplicate_p (sel, &elt))
+ if (const_vec_duplicate_p (sel, &elt))
{
icode = code_for_pred_gather_scalar (data_mode);
sel = elt;
}
+ else if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
+ icode = code_for_pred_gatherei16 (data_mode);
else if (CONST_VECTOR_P (sel)
&& GET_MODE_BITSIZE (GET_MODE_INNER (sel_mode)) > 16
&& riscv_get_v_regno_alignment (data_mode) > 1)
@@ -3261,11 +3261,26 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d)
if (!pow2p_hwi (d->perm.encoding().npatterns ()))
return false;
- /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv.
- Otherwise, it could overflow the index range. */
- if (!nunits.is_constant () && GET_MODE_INNER (d->vmode) == QImode
- && !get_vector_mode (HImode, nunits).exists (&sel_mode))
- return false;
+ if (GET_MODE_INNER (d->vmode) == QImode)
+ {
+ if (nunits.is_constant ())
+ {
+ /* If indice is LMUL8 CONST_VECTOR and any element value
+ exceed the range of 0 ~ 255, Forbid such permutation
+ since we need vector HI mode to hold such indice and
+ we don't have it. */
+ if (!d->perm.all_in_range_p (0, 255)
+ && !get_vector_mode (HImode, nunits).exists (&sel_mode))
+ return false;
+ }
+ else
+ {
+ /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv.
+ Otherwise, it could overflow the index range. */
+ if (!get_vector_mode (HImode, nunits).exists (&sel_mode))
+ return false;
+ }
+ }
/* Success! */
if (d->testing_p)
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 1018730..27dae10 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -376,13 +376,13 @@
(V4QI "riscv_vector::vls_mode_valid_p (V4QImode)")
(V8QI "riscv_vector::vls_mode_valid_p (V8QImode)")
(V16QI "riscv_vector::vls_mode_valid_p (V16QImode)")
- (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)")
- (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64")
- (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128")
- (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256")
- (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512")
- (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024")
- (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048")
+ (V32QI "riscv_vector::vls_mode_valid_p (V32QImode) && TARGET_MIN_VLEN >= 64")
+ (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 128")
+ (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 256")
+ (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 512")
+ (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 1024")
+ (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 2048")
+ (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 4096")
(V1HI "riscv_vector::vls_mode_valid_p (V1HImode)")
(V2HI "riscv_vector::vls_mode_valid_p (V2HImode)")
(V4HI "riscv_vector::vls_mode_valid_p (V4HImode)")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112598-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112598-2.c
new file mode 100644
index 0000000..d32e8ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112598-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zfh_zvl512b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */
+
+#include <stdint-gcc.h>
+
+void
+f (uint8_t *restrict a, uint8_t *restrict b, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = b[i * 8 + 3] + 1;
+ a[i * 8 + 1] = b[i * 8 + 2] + 2;
+ a[i * 8 + 2] = b[i * 8 + 1] + 3;
+ a[i * 8 + 3] = b[i * 8 + 0] + 4;
+ a[i * 8 + 4] = b[i * 8 + 7] + 5;
+ a[i * 8 + 5] = b[i * 8 + 6] + 6;
+ a[i * 8 + 6] = b[i * 8 + 5] + 7;
+ a[i * 8 + 7] = b[i * 8 + 4] + 8;
+ }
+}
+
+/* We don't want EEW8 LMUL8 vrgather.vv. */
+/* { dg-final { scan-assembler-not {vrgather\.vv} } } */
+