diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-06-13 17:30:55 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-06-13 21:53:47 +0800 |
commit | cbe7f069e6a1a5c4f17019f4449da35a182775fc (patch) | |
tree | 2117a3239e5114cfdba65bb89e1ddec16f264851 /gcc/config | |
parent | 2270f4fdaa0331b5c7fa53baeb7fd0038639a73f (diff) | |
download | gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.zip gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.tar.gz gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.tar.bz2 |
RISC-V: Fix bug of VLA SLP auto-vectorization
Sorry for producing bugs in the previous VLA SLP patch.
Consider this following permutation:
_85 = VEC_PERM_EXPR <{ 99, 17, ... }, { 11, 80, ... }, { 0, POLY_INT_CST [4, 4], 1, POLY_INT_CST [5, 4], 2, POLY_INT_CST [6, 4], ... }>;
The correct result should be:
_85 = { 99, 11, 17, 80, ... }
However, I did wrong in the previous patch.
Code sequence before this patch:
set mask = { 0, 1, 0, 1, ... }
set v0 = { 99, 17, 99, 17, ... }
set v1 = { 11, 80, 11, 80, ... }
set index = viota (mask) = { 0, 0, 1, 1, 2, 2, ... }
set result = vrgather_mu (v0, v1, index, mask) = { 99, 11, 99, 80 }
The result is incorrect.
After this patch:
set mask = { 0, 1, 0, 1, ... }
set index = viota (mask) = { 0, 0, 1, 1, 2, 2, ... }
set v0 = vrgather ({ 99, 17, 99, 17, ... }, index) = { 99, 99, 17, 17, ... }
set v1 = { 11, 80, 11, 80, ... }
set result = vrgather_mu (v0, v1, index, mask) = { 99, 11, 17, 80 }
The result is what we expected.
This issue was discovered in the test I appended in this patch with --param=riscv-autovec-lmul=2.
gcc/ChangeLog:
* config/riscv/riscv-v.cc (emit_vlmax_decompress_insn): Fix bug.
(shuffle_decompress_patterns): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/partial/slp-12.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-12.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index fb97034..34fdb53 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -863,7 +863,7 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask) e q r d c b v a # v11 destination after vrgather using viota.m under mask */ static void -emit_vlmax_decompress_insn (rtx target, rtx op, rtx mask) +emit_vlmax_decompress_insn (rtx target, rtx op0, rtx op1, rtx mask) { machine_mode data_mode = GET_MODE (target); machine_mode sel_mode = related_int_vector_mode (data_mode).require (); @@ -873,7 +873,8 @@ emit_vlmax_decompress_insn (rtx target, rtx op, rtx mask) rtx sel = gen_reg_rtx (sel_mode); rtx iota_ops[] = {sel, mask}; emit_vlmax_insn (code_for_pred_iota (sel_mode), RVV_UNOP, iota_ops); - emit_vlmax_masked_gather_mu_insn (target, op, sel, mask); + emit_vlmax_gather_insn (target, op0, sel); + emit_vlmax_masked_gather_mu_insn (target, op1, sel, mask); } /* Emit merge instruction. */ @@ -2441,8 +2442,7 @@ shuffle_decompress_patterns (struct expand_vec_perm_d *d) rtx const_vec = gen_const_vector_dup (sel_mode, 1); rtx mask = gen_reg_rtx (mask_mode); expand_vec_cmp (mask, EQ, vid_repeat, const_vec); - emit_move_insn (d->target, op0); - emit_vlmax_decompress_insn (d->target, op1, mask); + emit_vlmax_decompress_insn (d->target, op0, op1, mask); return true; } |