aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-07-03 20:33:42 +0800
committerPan Li <pan2.li@intel.com>2023-07-04 10:48:37 +0800
commitc26f275861378cb4e339563b22e06cc8e2dc8120 (patch)
treef5d21be0437d71f6741b934687d792110d58b9af /gcc
parent48c85cb5334df621a05b322661089dd08be27094 (diff)
downloadgcc-c26f275861378cb4e339563b22e06cc8e2dc8120.zip
gcc-c26f275861378cb4e339563b22e06cc8e2dc8120.tar.gz
gcc-c26f275861378cb4e339563b22e06cc8e2dc8120.tar.bz2
RISC-V: Optimize local AVL propagation
I recently noticed that current VSETVL pass has a unnecessary restriction on local AVL propgation. Consider this following case: + insn 1: vsetvli a5,a3,e8,mf4,ta,mu + insn 2: vsetvli zero,a5,e32,m1,ta,ma + ... + vle32.v v1,0(a1) + vsetvli a2,zero,e32,m1,ta,ma + vadd.vv v1,v1,v1 + vsetvli zero,a5,e32,m1,ta,ma + vse32.v v1,0(a0) + ... + insn 3: sub a3,a3,a5 + ... We failed to elide insn 2 (vsetvl insn) since insn 3 is modifying "a3" AVL. Actually, we don't really care about insn 3 since we should only check and make sure there is no insn between insn 1 and insn 2 that modifies "a3" AVL. Then, we can propgate AVL "a3" from insn 1 to insn 2. Finally, insn 2 is eliminated. After this patch: + insn 1: vsetvli a5,a3,e8,mf4,ta,ma + ... + vle32.v v1,0(a1) + vsetvli a2,zero,e32,m1,ta,ma + vadd.vv v1,v1,v1 + vsetvli zero,a5,e32,m1,ta,ma + vse32.v v1,0(a0) + ... + insn 3: sub a3,a3,a5 + ... gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (vector_insn_info::parse_insn): Add early break. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/avl_prop-1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc22
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c21
2 files changed, 43 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 2d576e8..ab47901 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2025,6 +2025,28 @@ vector_insn_info::parse_insn (insn_info *insn)
real_insn_and_same_bb_p (i, get_insn ()->bb ());
i = i->next_nondebug_insn ())
{
+ /* Consider this following sequence:
+
+ insn 1: vsetvli a5,a3,e8,mf4,ta,mu
+ insn 2: vsetvli zero,a5,e32,m1,ta,ma
+ ...
+ vle32.v v1,0(a1)
+ vsetvli a2,zero,e32,m1,ta,ma
+ vadd.vv v1,v1,v1
+ vsetvli zero,a5,e32,m1,ta,ma
+ vse32.v v1,0(a0)
+ ...
+ insn 3: sub a3,a3,a5
+ ...
+
+ We can local AVL propagate "a3" from insn 1 to insn 2
+ if no insns between insn 1 and insn 2 modify "a3 even
+ though insn 3 modifies "a3".
+ Otherwise, we can't perform local AVL propagation.
+
+ Early break if we reach the insn 2. */
+ if (!before_p (i, insn))
+ break;
if (find_access (i->defs (), REGNO (new_info.get_avl ())))
{
modified_p = true;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c
new file mode 100644
index 0000000..19ea0f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *a, void *b, void *c, size_t n)
+{
+ for (size_t vl; n > 0; n -= vl, a += vl, b += vl * 4, c += vl)
+ {
+ vl = __riscv_vsetvl_e8mf4 (n);
+ vint32m1_t vec_b = __riscv_vle32_v_i32m1 (b, vl);
+ vint32m1_t vec_a = __riscv_vadd_vv_i32m1 (vec_b, vec_b, __riscv_vsetvlmax_e32m1 ());
+ __riscv_vse32_v_i32m1 (a, vec_a, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */