From 4db744149b6ed46314107187bb25e142c729f442 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Mon, 11 Dec 2023 19:58:43 +0800 Subject: RISC-V: Robostify shuffle index used by vrgather and fix regression Notice there are some regression FAILs: FAIL: gcc.target/riscv/rvv/autovec/pr110950.c -O3 -ftree-vectorize scan-assembler-times vslide1up\\.vx 1 FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c -std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax scan-assembler-times vrgather\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 19 FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c -std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax scan-assembler-times vrgatherei16\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 12 FAIL: gcc.target/riscv/rvv/autovec/vls/perm-4.c -O3 -ftree-vectorize --param riscv-autovec-preference=scalable scan-assembler-times vrgather\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 19 FAIL: gcc.target/riscv/rvv/autovec/vls/perm-4.c -O3 -ftree-vectorize --param riscv-autovec-preference=scalable scan-assembler-times vrgatherei16\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 12 pr110950 is not a regression, adapt testcase is enough. The rest FAILs which is caused by this patch: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d9dd06ad51b7479f09acb88adf404664a1e18b2a need to be recovered back. Robostify the gather index to fixe those FAILs. gcc/ChangeLog: * config/riscv/riscv-v.cc (get_gather_index_mode): New function. (shuffle_series_patterns): Robostify shuffle index. (shuffle_generic_patterns): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr110950.c: Adapt test. --- gcc/config/riscv/riscv-v.cc | 80 +++++++++++++--------- .../gcc.target/riscv/rvv/autovec/pr110950.c | 2 +- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 484c690..944b37b 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2923,6 +2923,39 @@ struct expand_vec_perm_d bool testing_p; }; +/* Return the appropriate index mode for gather instructions. */ +opt_machine_mode +get_gather_index_mode (struct expand_vec_perm_d *d) +{ + machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); + poly_uint64 nunits = GET_MODE_NUNITS (d->vmode); + + if (GET_MODE_INNER (d->vmode) == QImode) + { + if (nunits.is_constant ()) + { + /* If indice is LMUL8 CONST_VECTOR and any element value + exceed the range of 0 ~ 255, Forbid such permutation + since we need vector HI mode to hold such indice and + we don't have it. */ + if (!d->perm.all_in_range_p (0, 255) + && !get_vector_mode (HImode, nunits).exists (&sel_mode)) + return opt_machine_mode (); + } + else + { + /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv. + Otherwise, it could overflow the index range. */ + if (!get_vector_mode (HImode, nunits).exists (&sel_mode)) + return opt_machine_mode (); + } + } + else if (riscv_get_v_regno_alignment (sel_mode) > 1 + && GET_MODE_INNER (sel_mode) != HImode) + sel_mode = get_vector_mode (HImode, nunits).require (); + return sel_mode; +} + /* Recognize the patterns that we can use merge operation to shuffle the vectors. The value of Each element (index i) in selector can only be either i or nunits + i. We will check the pattern is actually monotonic. @@ -3428,12 +3461,10 @@ shuffle_series_patterns (struct expand_vec_perm_d *d) if (!have_series) return false; - /* Get a vector int-mode to be used for the permute selector. */ - machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); - insn_code icode = optab_handler (vec_shl_insert_optab, sel_mode); - - /* We need to be able to insert an element and shift the vector. */ - if (need_insert && icode == CODE_FOR_nothing) + /* Disable shuffle if we can't find an appropriate integer index mode for + gather. */ + machine_mode sel_mode; + if (!get_gather_index_mode (d).exists (&sel_mode)) return false; /* Success! */ @@ -3448,7 +3479,12 @@ shuffle_series_patterns (struct expand_vec_perm_d *d) /* Insert the remaining element if necessary. */ if (need_insert) - emit_insn (GEN_FCN (icode) (series, series, gen_int_mode (el1, eltmode))); + { + insn_code icode = code_for_pred_slide (UNSPEC_VSLIDE1UP, sel_mode); + rtx ops[] + = {series, series, gen_int_mode (el1, GET_MODE_INNER (sel_mode))}; + emit_vlmax_insn (icode, BINARY_OP, ops); + } emit_vlmax_gather_insn (d->target, d->op0, series); @@ -3460,36 +3496,16 @@ shuffle_series_patterns (struct expand_vec_perm_d *d) static bool shuffle_generic_patterns (struct expand_vec_perm_d *d) { - machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); - poly_uint64 nunits = GET_MODE_NUNITS (d->vmode); + machine_mode sel_mode; /* We don't enable SLP for non-power of 2 NPATTERNS. */ if (!pow2p_hwi (d->perm.encoding().npatterns ())) return false; - if (GET_MODE_INNER (d->vmode) == QImode) - { - if (nunits.is_constant ()) - { - /* If indice is LMUL8 CONST_VECTOR and any element value - exceed the range of 0 ~ 255, Forbid such permutation - since we need vector HI mode to hold such indice and - we don't have it. */ - if (!d->perm.all_in_range_p (0, 255) - && !get_vector_mode (HImode, nunits).exists (&sel_mode)) - return false; - } - else - { - /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv. - Otherwise, it could overflow the index range. */ - if (!get_vector_mode (HImode, nunits).exists (&sel_mode)) - return false; - } - } - else if (riscv_get_v_regno_alignment (sel_mode) > 1 - && GET_MODE_INNER (sel_mode) != HImode) - sel_mode = get_vector_mode (HImode, nunits).require (); + /* Disable shuffle if we can't find an appropriate integer index mode for + gather. */ + if (!get_gather_index_mode (d).exists (&sel_mode)) + return false; /* Success! */ if (d->testing_p) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c index b927f1e..17dd439 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c @@ -9,4 +9,4 @@ void b() { c[a] = d[-a]; } -/* { dg-final { scan-assembler-times {vslide1up\.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vrgather} 1 } } */ -- cgit v1.1