aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-11-27 21:24:12 +0800
committerPan Li <pan2.li@intel.com>2023-11-28 10:10:55 +0800
commit9c16ca93641ad460a576a9ed7daf2aadf596193c (patch)
tree7d9b7ce61d637c004e7399bc311c31f17151304a
parent5099525bff4f7c36f289537bb15c582a8b51e2b3 (diff)
downloadgcc-9c16ca93641ad460a576a9ed7daf2aadf596193c.zip
gcc-9c16ca93641ad460a576a9ed7daf2aadf596193c.tar.gz
gcc-9c16ca93641ad460a576a9ed7daf2aadf596193c.tar.bz2
RISC-V: Fix VSETVL PASS regression
This patch is regression fix patch, not an optimization patch. Since trunk GCC generates redundant vsetvl than GCC-13. This is the case: bb 2: def a2 (vsetvl a2, zero) bb 3: use a2 bb 4: use a2 (vle) before this patch: bb 2: vsetvl a2 zero bb 3: vsetvl zero, zero ----> should be eliminated. bb 4: vle.v The root cause is we didn't set bb 3 as transparent since the incorrect codes. bb 3 didn't modify "a2" just use it, the VSETVL status from bb 2 can be available to bb 3 and bb 4: bb 2 -> bb 3 -> bb4. Another regression fix is anticipation calculation: bb 4: use a5 (sub) use a5 (vle) The vle VSETVL status should be considered as anticipated as long as both sub and vle a5 def are coming from same def. Tested on zvl128b no regression. I am going to test on zvl256/zvl512/zvl1024 PR target/112713 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Fix regression. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/pr112713-1.c: New test. * gcc.target/riscv/rvv/vsetvl/pr112713-2.c: New test.
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc29
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c24
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c47
3 files changed, 91 insertions, 9 deletions
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 74367ec..b3e07d4 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1433,9 +1433,23 @@ private:
inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
{
- return info.has_vl ()
- && (find_access (i->uses (), REGNO (info.get_vl ()))
- || find_access (i->defs (), REGNO (info.get_vl ())));
+ if (info.has_vl ())
+ {
+ if (find_access (i->defs (), REGNO (info.get_vl ())))
+ return true;
+ if (find_access (i->uses (), REGNO (info.get_vl ())))
+ {
+ resource_info resource = full_register (REGNO (info.get_vl ()));
+ def_lookup dl1 = crtl->ssa->find_def (resource, i);
+ def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
+ if (dl1.matching_set () || dl2.matching_set ())
+ return true;
+ /* If their VLs are coming from same def, we still want to fuse
+ their VSETVL demand info to gain better performance. */
+ return dl1.prev_def (i) != dl2.prev_def (i);
+ }
+ }
+ return false;
}
inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
{
@@ -1702,7 +1716,7 @@ public:
for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
i = i->prev_nondebug_insn ())
{
- // no def amd use of vl
+ // no def and use of vl
if (!ignore_vl && modify_or_use_vl_p (i, info))
return false;
@@ -2635,11 +2649,8 @@ pre_vsetvl::compute_lcm_local_properties ()
for (const insn_info *insn : bb->real_nondebug_insns ())
{
- if ((info.has_nonvlmax_reg_avl ()
- && find_access (insn->defs (), REGNO (info.get_avl ())))
- || (info.has_vl ()
- && find_access (insn->uses (),
- REGNO (info.get_vl ()))))
+ if (info.has_nonvlmax_reg_avl ()
+ && find_access (insn->defs (), REGNO (info.get_avl ())))
{
bitmap_clear_bit (m_transp[bb_index], i);
break;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
new file mode 100644
index 0000000..76402ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t
+foo (char const *buf, size_t len)
+{
+ size_t sum = 0;
+ size_t vl = __riscv_vsetvlmax_e8m8();
+ size_t step = vl * 4;
+ const char *it = buf, *end = buf + len;
+ for(; it + step <= end; ) {
+ it += vl;
+ vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+ sum += __riscv_vcpop_m_b1(m3, vl);
+ }
+ return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
new file mode 100644
index 0000000..04539d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+static size_t
+utf8_count_rvv(char const *buf, size_t len)
+{
+ size_t sum = 0;
+ for (size_t vl; len > 0; len -= vl, buf += vl) {
+ vl = __riscv_vsetvl_e8m8(len);
+ vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl);
+ vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl);
+ sum += __riscv_vcpop_m_b1(mask, vl);
+ }
+ return sum;
+}
+
+size_t
+utf8_count_rvv_4x_tail(char const *buf, size_t len)
+{
+ size_t sum = 0;
+ size_t vl = __riscv_vsetvlmax_e8m8();
+ size_t step = vl * 4;
+ const char *it = buf, *end = buf + len;
+ for(; it + step <= end; ) {
+ vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl);
+ vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl);
+ vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl);
+ vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+ sum += __riscv_vcpop_m_b1(m0, vl);
+ sum += __riscv_vcpop_m_b1(m1, vl);
+ sum += __riscv_vcpop_m_b1(m2, vl);
+ sum += __riscv_vcpop_m_b1(m3, vl);
+ }
+ return sum + utf8_count_rvv(it, end - it);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */