diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-12-20 14:55:26 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-12-21 07:06:12 +0800 |
commit | 008b80e42eb7cb55c6a2ef55728241b8733dfd17 (patch) | |
tree | bb3a4cf1b625b83cd8ff92b4a966a0f44ca47772 | |
parent | d82bb518fa372cc30cc3352e0a124d0bd6deb36f (diff) | |
download | gcc-008b80e42eb7cb55c6a2ef55728241b8733dfd17.zip gcc-008b80e42eb7cb55c6a2ef55728241b8733dfd17.tar.gz gcc-008b80e42eb7cb55c6a2ef55728241b8733dfd17.tar.bz2 |
RISC-V: Optimize SELECT_VL codegen when length is known as smaller than VF
While trying to fix bugs of PR113097, notice this following situation we
generate redundant vsetvli
_255 = SELECT_VL (3, POLY_INT_CST [4, 4]);
COND_LEN (..., _255)
Before this patch:
vsetivli a5, 3...
...
vadd.vv (use a5)
After this patch:
...
vadd.vv (use AVL = 3)
The reason we can do this is because known_ge (3, [4,4]) is true.
It's safe to apply such optimization
Tested on both RV32 and RV64 full coverage testing, no regression.
PR target/113087
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_select_vl): Optimize SELECT_VL.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr113087-2.c: New test.
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c | 61 |
2 files changed, 71 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 3b9be25..038ab08 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3750,6 +3750,16 @@ void expand_select_vl (rtx *ops) { poly_int64 nunits = rtx_to_poly_int64 (ops[2]); + if (CONST_INT_P (ops[1]) && known_le (INTVAL (ops[1]), nunits)) + { + /* If length is known <= VF, we just use the length directly instead + of using vsetvli. + + E.g. _255 = .SELECT_VL (3, POLY_INT_CST [4, 4]); + We move 3 into _255 intead of using explicit vsetvl. */ + emit_move_insn (ops[0], ops[1]); + return; + } /* We arbitrary picked QImode as inner scalar mode to get vector mode. since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */ scalar_int_mode mode = QImode; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c new file mode 100644 index 0000000..836260f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include <assert.h> +int (e) (int g, int h) { return h > 0x10 || g > 0xFFFFFFFF >> h ? g : g << h; } +struct i +{ + int j; + int l : 1; +}; +struct m +{ + char k; + int n; +}; +char o; +char p; +short s; +int q; +struct m r; +int v; +int t; +short z; +long ac; +int ad; +int ae; + +static void +ai (struct i bf) +{ + for (; v; v++) + r.k = 0; + do + ac ^= bf.j; + while (bf.j < 0); + s = 0; + if (bf.l) + q |= 0x800; +} + +int +main () +{ + struct i aw = {0xE00, 1}; + o = 4; + s = p; + ai (aw); + t = 1; + ++p; + for (; t <= 7; t++) + { + ad &= 1; + (o &= 1 - e (0x40000012, ++ae)) & (z |= 1); + } + for (; r.n;) + ; + assert (o == 4); + return 0; +} + +/* { dg-final { scan-assembler-not {vsetivli\s+[a-x0-9]+,\s*3} } } */ |