aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-12-01 08:39:57 +0800
committerLehua Ding <lehua.ding@rivai.ai>2023-12-01 14:49:01 +0800
commit923a67f17badcbe6e2b2e5d3570a265443258c8e (patch)
treef5c70d4b6de6eeef1390b37e37a8907562744535
parent73e2bdbf9bed48b2b30691f03e79230bff4850c6 (diff)
downloadgcc-923a67f17badcbe6e2b2e5d3570a265443258c8e.zip
gcc-923a67f17badcbe6e2b2e5d3570a265443258c8e.tar.gz
gcc-923a67f17badcbe6e2b2e5d3570a265443258c8e.tar.bz2
RISC-V: Fix VSETVL PASS regression
This patch fix 2 regression (one is bug regression, the other is performance regression). Those 2 regressions are both we are comparing ratio for same AVL in wrong place. 1. BUG regression: avl_single-84.c: f0: li a5,999424 add a1,a1,a5 li a4,299008 add a5,a0,a5 addi a3,a4,992 addi a5,a5,576 addi a1,a1,576 vsetvli a4,zero,e8,m2,ta,ma add a0,a0,a3 vlm.v v1,0(a5) vsm.v v1,0(a1) vl1re64.v v1,0(a0) beq a2,zero,.L10 li a5,0 vsetvli zero,zero,e64,m1,tu,ma ---> This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here. .L3: fcvt.d.lu fa5,a5 addi a5,a5,1 fadd.d fa5,fa5,fa0 vfmv.s.f v1,fa5 bne a5,a2,.L3 vfmv.f.s fa0,v1 ret .L10: vsetvli zero,zero,e64,m1,ta,ma vfmv.f.s fa0,v1 ret 2. Performance regression: before this patch: vsetvli a5,a4,e8,m1,ta,ma vsetvli zero,a5,e32,m1,tu,ma vmv.s.x v2,zero vmv.s.x v1,zero vsetvli zero,a5,e32,m4,tu,ma vle32.v v4,0(a1) vfmul.vv v4,v4,v4 vfredosum.vs v1,v4,v2 vfmv.f.s fa5,v1 fsw fa5,0(a0) sub a4,a4,a5 bne a4,zero,.L2 ret After this patch: vsetvli a5,a4,e32,m4,tu,ma vle32.v v4,0(a1) vmv.s.x v2,zero vmv.s.x v1,zero vfmul.vv v4,v4,v4 vfredosum.vs v1,v4,v2 vfmv.f.s fa5,v1 fsw fa5,0(a0) sub a4,a4,a5 bne a4,zero,.L2 ret Tested rv64gcv_zvfh_zfh passed no regression. zvl256b/zvl512b/zvl1024b/zve64d is runing. PR target/112776 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test. * gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto. * gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc13
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c6
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c36
4 files changed, 46 insertions, 11 deletions
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4..1da95da 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@ private:
{
gcc_assert (prev.valid_p () && next.valid_p ());
- if (prev.get_ratio () != next.get_ratio ())
- return false;
-
if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
return false;
@@ -2188,7 +2185,7 @@ private:
return true;
}
- bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+ bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
{
gcc_assert (
!bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@ private:
{
const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
if (!prev_info.valid_p ()
- || !m_dem.avl_available_p (prev_info, curr_info))
+ || !m_dem.avl_available_p (prev_info, curr_info)
+ || prev_info.get_ratio () != curr_info.get_ratio ())
return false;
}
@@ -3171,7 +3169,7 @@ pre_vsetvl::pre_global_vsetvl_info ()
curr_info = block_info.local_infos[0];
}
if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
- && preds_has_same_avl_p (curr_info))
+ && preds_all_same_avl_and_ratio_p (curr_info))
curr_info.set_change_vtype_only ();
vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@ pre_vsetvl::pre_global_vsetvl_info ()
for (auto &curr_info : block_info.local_infos)
{
if (prev_info.valid_p () && curr_info.valid_p ()
- && m_dem.avl_available_p (prev_info, curr_info))
+ && m_dem.avl_available_p (prev_info, curr_info)
+ && prev_info.get_ratio () == curr_info.get_ratio ())
curr_info.set_change_vtype_only ();
prev_info = curr_info;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd9..5cd0f28 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@ double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642..13344ec 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@ void foo(_Float16 y, int16_t z, int64_t *i64p)
}
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 0000000..8536901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+ int i, k;
+
+ vfloat32m4_t x_vec;
+ vfloat32m4_t x_forward_vec;
+ vfloat32m4_t temp_vec;
+ vfloat32m1_t dst_vec;
+ vfloat32m1_t src_vec;
+
+ float result = 0.0f;
+ float shift_prev = 0.0f;
+
+ size_t n = 64;
+ for (size_t vl; n > 0; n -= vl)
+ {
+ vl = __riscv_vsetvl_e32m4 (n);
+ x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+ x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+ temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+ src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+ dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+ dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+ r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */