aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-12-23 07:07:42 +0800
committerPan Li <pan2.li@intel.com>2023-12-23 08:59:03 +0800
commit290230034092898981488d0716ddae43bd36c09f (patch)
tree3c2273ea9a34960cb57aedbd8f867ce424da0a04 /gcc
parent0a529d196b1f54a2d1e1bf39eae8290d9cfe29ed (diff)
downloadgcc-290230034092898981488d0716ddae43bd36c09f.zip
gcc-290230034092898981488d0716ddae43bd36c09f.tar.gz
gcc-290230034092898981488d0716ddae43bd36c09f.tar.bz2
RISC-V: Make PHI initial value occupy live V_REG in dynamic LMUL cost model analysis
Consider this following case: foo: ble a0,zero,.L11 lui a2,%hi(.LANCHOR0) addi sp,sp,-128 addi a2,a2,%lo(.LANCHOR0) mv a1,a0 vsetvli a6,zero,e32,m8,ta,ma vid.v v8 vs8r.v v8,0(sp) ---> spill .L3: vl8re32.v v16,0(sp) ---> reload vsetvli a4,a1,e8,m2,ta,ma li a3,0 vsetvli a5,zero,e32,m8,ta,ma vmv8r.v v0,v16 vmv.v.x v8,a4 vmv.v.i v24,0 vadd.vv v8,v16,v8 vmv8r.v v16,v24 vs8r.v v8,0(sp) ---> spill .L4: addiw a3,a3,1 vadd.vv v8,v0,v16 vadd.vi v16,v16,1 vadd.vv v24,v24,v8 bne a0,a3,.L4 vsetvli zero,a4,e32,m8,ta,ma sub a1,a1,a4 vse32.v v24,0(a2) slli a4,a4,2 add a2,a2,a4 bne a1,zero,.L3 li a0,0 addi sp,sp,128 jr ra .L11: li a0,0 ret Pick unexpected LMUL = 8. The root cause is we didn't involve PHI initial value in the dynamic LMUL calculation: # j_17 = PHI <j_11(9), 0(5)> ---> # vect_vec_iv_.8_24 = PHI <_25(9), { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }(5)> We didn't count { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } in consuming vector register but it does allocate an vector register group for it. This patch fixes this missing count. Then after this patch we pick up perfect LMUL (LMUL = M4) foo: ble a0,zero,.L9 lui a4,%hi(.LANCHOR0) addi a4,a4,%lo(.LANCHOR0) mv a2,a0 vsetivli zero,16,e32,m4,ta,ma vid.v v20 .L3: vsetvli a3,a2,e8,m1,ta,ma li a5,0 vsetivli zero,16,e32,m4,ta,ma vmv4r.v v16,v20 vmv.v.i v12,0 vmv.v.x v4,a3 vmv4r.v v8,v12 vadd.vv v20,v20,v4 .L4: addiw a5,a5,1 vmv4r.v v4,v8 vadd.vi v8,v8,1 vadd.vv v4,v16,v4 vadd.vv v12,v12,v4 bne a0,a5,.L4 slli a5,a3,2 vsetvli zero,a3,e32,m4,ta,ma sub a2,a2,a3 vse32.v v12,0(a4) add a4,a4,a5 bne a2,zero,.L3 .L9: li a0,0 ret Tested on --with-arch=gcv no regression. PR target/113112 gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (max_number_of_live_regs): Refine dump information. (preferred_new_lmul_p): Make PHI initial value into live regs calculation. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc45
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c31
2 files changed, 71 insertions, 5 deletions
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index a316603..946eb4a 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -355,10 +355,11 @@ max_number_of_live_regs (const basic_block bb,
}
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Maximum lmul = %d, %d number of live V_REG at program "
- "point %d for bb %d\n",
- lmul, max_nregs, live_point, bb->index);
+ dump_printf_loc (
+ MSG_NOTE, vect_location,
+ "Maximum lmul = %d, At most %d number of live V_REG at program "
+ "point %d for bb %d\n",
+ lmul, max_nregs, live_point, bb->index);
return max_nregs;
}
@@ -472,6 +473,41 @@ update_local_live_ranges (
tree def = gimple_phi_arg_def (phi, j);
auto *live_ranges = live_ranges_per_bb.get (bb);
auto *live_range = live_ranges->get (def);
+ if (poly_int_tree_p (def))
+ {
+ /* Insert live range of INTEGER_CST or POLY_CST since we will
+ need to allocate a vector register for it.
+
+ E.g. # j_17 = PHI <j_11(9), 0(5)> will be transformed
+ into # vect_vec_iv_.8_24 = PHI <_25(9), { 0, ... }(5)>
+
+ The live range for such value is short which only lives
+ from program point 0 to 1. */
+ if (live_range)
+ {
+ unsigned int start = (*live_range).first;
+ (*live_range).first = 0;
+ if (dump_enabled_p ())
+ dump_printf_loc (
+ MSG_NOTE, vect_location,
+ "Update %T start point from %d to 0:\n", def, start);
+ }
+ else
+ {
+ live_ranges->put (def, pair (0, 1));
+ auto &program_points = (*program_points_per_bb.get (bb));
+ if (program_points.is_empty ())
+ {
+ stmt_point info = {1, phi};
+ program_points.safe_push (info);
+ }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Add %T start point from 0 to 1:\n",
+ def);
+ }
+ continue;
+ }
if (live_range && flow_bb_inside_loop_p (loop, e->src))
{
unsigned int start = (*live_range).first;
@@ -580,7 +616,6 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo)
biggest_mode, lmul);
if (nregs > max_nregs)
max_nregs = nregs;
- live_ranges_per_bb.empty ();
}
live_ranges_per_bb.empty ();
return max_nregs > V_REG_NUM;
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c
new file mode 100644
index 0000000..a44a1c0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=fixed-vlmax -fdump-tree-vect-details" } */
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (int n){
+ int i,j;
+ int sum,x;
+
+ for (i = 0; i < n; i++) {
+ sum = 0;
+ for (j = 0; j < n; j++) {
+ sum += (i + j);
+ }
+ a[i] = sum;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler-times {ret} 1 } } */
+/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */
+/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump "At most 8 number of live V_REG at program point 0 for bb 4" "vect" } } */
+/* { dg-final { scan-tree-dump "At most 40 number of live V_REG at program point 0 for bb 3" "vect" } } */
+/* { dg-final { scan-tree-dump "At most 8 number of live V_REG at program point 0 for bb 5" "vect" } } */