RISC-V: Fix bug of VLA SLP auto-vectorization

Sorry for producing bugs in the previous VLA SLP patch. Consider this following permutation: _85 = VEC_PERM_EXPR <{ 99, 17, ... }, { 11, 80, ... }, { 0, POLY_INT_CST [4, 4], 1, POLY_INT_CST [5, 4], 2, POLY_INT_CST [6, 4], ... }>; The correct result should be: _85 = { 99, 11, 17, 80, ... } However, I did wrong in the previous patch. Code sequence before this patch: set mask = { 0, 1, 0, 1, ... } set v0 = { 99, 17, 99, 17, ... } set v1 = { 11, 80, 11, 80, ... } set index = viota (mask) = { 0, 0, 1, 1, 2, 2, ... } set result = vrgather_mu (v0, v1, index, mask) = { 99, 11, 99, 80 } The result is incorrect. After this patch: set mask = { 0, 1, 0, 1, ... } set index = viota (mask) = { 0, 0, 1, 1, 2, 2, ... } set v0 = vrgather ({ 99, 17, 99, 17, ... }, index) = { 99, 99, 17, 17, ... } set v1 = { 11, 80, 11, 80, ... } set result = vrgather_mu (v0, v1, index, mask) = { 99, 11, 17, 80 } The result is what we expected. This issue was discovered in the test I appended in this patch with --param=riscv-autovec-lmul=2. gcc/ChangeLog: * config/riscv/riscv-v.cc (emit_vlmax_decompress_insn): Fix bug. (shuffle_decompress_patterns): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/partial/slp-12.c: New test. * gcc.target/riscv/rvv/autovec/partial/slp_run-12.c: New test.
author: Juzhe-Zhong <juzhe.zhong@rivai.ai> 2023-06-13 17:30:55 +0800
committer: Pan Li <pan2.li@intel.com> 2023-06-13 21:53:47 +0800
commit: cbe7f069e6a1a5c4f17019f4449da35a182775fc (patch)
tree: 2117a3239e5114cfdba65bb89e1ddec16f264851 /gcc/config
parent: 2270f4fdaa0331b5c7fa53baeb7fd0038639a73f (diff)
download: gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.zip
gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.tar.gz
gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.tar.bz2
1 files changed, 4 insertions, 4 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index fb97034..34fdb53 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -863,7 +863,7 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
      e q r d c b v a  # v11 destination after vrgather using viota.m under mask
 */
 static void
-emit_vlmax_decompress_insn (rtx target, rtx op, rtx mask)
+emit_vlmax_decompress_insn (rtx target, rtx op0, rtx op1, rtx mask)
 {
   machine_mode data_mode = GET_MODE (target);
   machine_mode sel_mode = related_int_vector_mode (data_mode).require ();
@@ -873,7 +873,8 @@ emit_vlmax_decompress_insn (rtx target, rtx op, rtx mask)
   rtx sel = gen_reg_rtx (sel_mode);
   rtx iota_ops[] = {sel, mask};
   emit_vlmax_insn (code_for_pred_iota (sel_mode), RVV_UNOP, iota_ops);
-  emit_vlmax_masked_gather_mu_insn (target, op, sel, mask);
+  emit_vlmax_gather_insn (target, op0, sel);
+  emit_vlmax_masked_gather_mu_insn (target, op1, sel, mask);
 }
 
 /* Emit merge instruction.  */
@@ -2441,8 +2442,7 @@ shuffle_decompress_patterns (struct expand_vec_perm_d *d)
   rtx const_vec = gen_const_vector_dup (sel_mode, 1);
   rtx mask = gen_reg_rtx (mask_mode);
   expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
-  emit_move_insn (d->target, op0);
-  emit_vlmax_decompress_insn (d->target, op1, mask);
+  emit_vlmax_decompress_insn (d->target, op0, op1, mask);
   return true;
 }
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>	2023-06-13 17:30:55 +0800
committer	Pan Li <pan2.li@intel.com>	2023-06-13 21:53:47 +0800
commit	cbe7f069e6a1a5c4f17019f4449da35a182775fc (patch)
tree	2117a3239e5114cfdba65bb89e1ddec16f264851 /gcc/config
parent	2270f4fdaa0331b5c7fa53baeb7fd0038639a73f (diff)
download	gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.zip gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.tar.gz gcc-cbe7f069e6a1a5c4f17019f4449da35a182775fc.tar.bz2