aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-12-20 14:55:26 +0800
committerPan Li <pan2.li@intel.com>2023-12-21 07:06:12 +0800
commit008b80e42eb7cb55c6a2ef55728241b8733dfd17 (patch)
treebb3a4cf1b625b83cd8ff92b4a966a0f44ca47772
parentd82bb518fa372cc30cc3352e0a124d0bd6deb36f (diff)
downloadgcc-008b80e42eb7cb55c6a2ef55728241b8733dfd17.zip
gcc-008b80e42eb7cb55c6a2ef55728241b8733dfd17.tar.gz
gcc-008b80e42eb7cb55c6a2ef55728241b8733dfd17.tar.bz2
RISC-V: Optimize SELECT_VL codegen when length is known as smaller than VF
While trying to fix bugs of PR113097, notice this following situation we generate redundant vsetvli _255 = SELECT_VL (3, POLY_INT_CST [4, 4]); COND_LEN (..., _255) Before this patch: vsetivli a5, 3... ... vadd.vv (use a5) After this patch: ... vadd.vv (use AVL = 3) The reason we can do this is because known_ge (3, [4,4]) is true. It's safe to apply such optimization Tested on both RV32 and RV64 full coverage testing, no regression. PR target/113087 gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_select_vl): Optimize SELECT_VL. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr113087-2.c: New test.
-rw-r--r--gcc/config/riscv/riscv-v.cc10
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c61
2 files changed, 71 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 3b9be25..038ab08 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3750,6 +3750,16 @@ void
expand_select_vl (rtx *ops)
{
poly_int64 nunits = rtx_to_poly_int64 (ops[2]);
+ if (CONST_INT_P (ops[1]) && known_le (INTVAL (ops[1]), nunits))
+ {
+ /* If length is known <= VF, we just use the length directly instead
+ of using vsetvli.
+
+ E.g. _255 = .SELECT_VL (3, POLY_INT_CST [4, 4]);
+ We move 3 into _255 intead of using explicit vsetvl. */
+ emit_move_insn (ops[0], ops[1]);
+ return;
+ }
/* We arbitrary picked QImode as inner scalar mode to get vector mode.
since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */
scalar_int_mode mode = QImode;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c
new file mode 100644
index 0000000..836260f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113087-2.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include <assert.h>
+int (e) (int g, int h) { return h > 0x10 || g > 0xFFFFFFFF >> h ? g : g << h; }
+struct i
+{
+ int j;
+ int l : 1;
+};
+struct m
+{
+ char k;
+ int n;
+};
+char o;
+char p;
+short s;
+int q;
+struct m r;
+int v;
+int t;
+short z;
+long ac;
+int ad;
+int ae;
+
+static void
+ai (struct i bf)
+{
+ for (; v; v++)
+ r.k = 0;
+ do
+ ac ^= bf.j;
+ while (bf.j < 0);
+ s = 0;
+ if (bf.l)
+ q |= 0x800;
+}
+
+int
+main ()
+{
+ struct i aw = {0xE00, 1};
+ o = 4;
+ s = p;
+ ai (aw);
+ t = 1;
+ ++p;
+ for (; t <= 7; t++)
+ {
+ ad &= 1;
+ (o &= 1 - e (0x40000012, ++ae)) & (z |= 1);
+ }
+ for (; r.n;)
+ ;
+ assert (o == 4);
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not {vsetivli\s+[a-x0-9]+,\s*3} } } */