diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-12-23 07:07:42 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-12-23 08:59:03 +0800 |
commit | 290230034092898981488d0716ddae43bd36c09f (patch) | |
tree | 3c2273ea9a34960cb57aedbd8f867ce424da0a04 /gcc | |
parent | 0a529d196b1f54a2d1e1bf39eae8290d9cfe29ed (diff) | |
download | gcc-290230034092898981488d0716ddae43bd36c09f.zip gcc-290230034092898981488d0716ddae43bd36c09f.tar.gz gcc-290230034092898981488d0716ddae43bd36c09f.tar.bz2 |
RISC-V: Make PHI initial value occupy live V_REG in dynamic LMUL cost model analysis
Consider this following case:
foo:
ble a0,zero,.L11
lui a2,%hi(.LANCHOR0)
addi sp,sp,-128
addi a2,a2,%lo(.LANCHOR0)
mv a1,a0
vsetvli a6,zero,e32,m8,ta,ma
vid.v v8
vs8r.v v8,0(sp) ---> spill
.L3:
vl8re32.v v16,0(sp) ---> reload
vsetvli a4,a1,e8,m2,ta,ma
li a3,0
vsetvli a5,zero,e32,m8,ta,ma
vmv8r.v v0,v16
vmv.v.x v8,a4
vmv.v.i v24,0
vadd.vv v8,v16,v8
vmv8r.v v16,v24
vs8r.v v8,0(sp) ---> spill
.L4:
addiw a3,a3,1
vadd.vv v8,v0,v16
vadd.vi v16,v16,1
vadd.vv v24,v24,v8
bne a0,a3,.L4
vsetvli zero,a4,e32,m8,ta,ma
sub a1,a1,a4
vse32.v v24,0(a2)
slli a4,a4,2
add a2,a2,a4
bne a1,zero,.L3
li a0,0
addi sp,sp,128
jr ra
.L11:
li a0,0
ret
Pick unexpected LMUL = 8.
The root cause is we didn't involve PHI initial value in the dynamic LMUL calculation:
# j_17 = PHI <j_11(9), 0(5)> ---> # vect_vec_iv_.8_24 = PHI <_25(9), { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }(5)>
We didn't count { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } in consuming vector register but it does allocate an vector register group for it.
This patch fixes this missing count. Then after this patch we pick up perfect LMUL (LMUL = M4)
foo:
ble a0,zero,.L9
lui a4,%hi(.LANCHOR0)
addi a4,a4,%lo(.LANCHOR0)
mv a2,a0
vsetivli zero,16,e32,m4,ta,ma
vid.v v20
.L3:
vsetvli a3,a2,e8,m1,ta,ma
li a5,0
vsetivli zero,16,e32,m4,ta,ma
vmv4r.v v16,v20
vmv.v.i v12,0
vmv.v.x v4,a3
vmv4r.v v8,v12
vadd.vv v20,v20,v4
.L4:
addiw a5,a5,1
vmv4r.v v4,v8
vadd.vi v8,v8,1
vadd.vv v4,v16,v4
vadd.vv v12,v12,v4
bne a0,a5,.L4
slli a5,a3,2
vsetvli zero,a3,e32,m4,ta,ma
sub a2,a2,a3
vse32.v v12,0(a4)
add a4,a4,a5
bne a2,zero,.L3
.L9:
li a0,0
ret
Tested on --with-arch=gcv no regression.
PR target/113112
gcc/ChangeLog:
* config/riscv/riscv-vector-costs.cc (max_number_of_live_regs): Refine dump information.
(preferred_new_lmul_p): Make PHI initial value into live regs calculation.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/riscv/riscv-vector-costs.cc | 45 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c | 31 |
2 files changed, 71 insertions, 5 deletions
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index a316603..946eb4a 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -355,10 +355,11 @@ max_number_of_live_regs (const basic_block bb, } if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Maximum lmul = %d, %d number of live V_REG at program " - "point %d for bb %d\n", - lmul, max_nregs, live_point, bb->index); + dump_printf_loc ( + MSG_NOTE, vect_location, + "Maximum lmul = %d, At most %d number of live V_REG at program " + "point %d for bb %d\n", + lmul, max_nregs, live_point, bb->index); return max_nregs; } @@ -472,6 +473,41 @@ update_local_live_ranges ( tree def = gimple_phi_arg_def (phi, j); auto *live_ranges = live_ranges_per_bb.get (bb); auto *live_range = live_ranges->get (def); + if (poly_int_tree_p (def)) + { + /* Insert live range of INTEGER_CST or POLY_CST since we will + need to allocate a vector register for it. + + E.g. # j_17 = PHI <j_11(9), 0(5)> will be transformed + into # vect_vec_iv_.8_24 = PHI <_25(9), { 0, ... }(5)> + + The live range for such value is short which only lives + from program point 0 to 1. */ + if (live_range) + { + unsigned int start = (*live_range).first; + (*live_range).first = 0; + if (dump_enabled_p ()) + dump_printf_loc ( + MSG_NOTE, vect_location, + "Update %T start point from %d to 0:\n", def, start); + } + else + { + live_ranges->put (def, pair (0, 1)); + auto &program_points = (*program_points_per_bb.get (bb)); + if (program_points.is_empty ()) + { + stmt_point info = {1, phi}; + program_points.safe_push (info); + } + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Add %T start point from 0 to 1:\n", + def); + } + continue; + } if (live_range && flow_bb_inside_loop_p (loop, e->src)) { unsigned int start = (*live_range).first; @@ -580,7 +616,6 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo) biggest_mode, lmul); if (nregs > max_nregs) max_nregs = nregs; - live_ranges_per_bb.empty (); } live_ranges_per_bb.empty (); return max_nregs > V_REG_NUM; diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c new file mode 100644 index 0000000..a44a1c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=fixed-vlmax -fdump-tree-vect-details" } */ + +#define N 40 + +int a[N]; + +__attribute__ ((noinline)) int +foo (int n){ + int i,j; + int sum,x; + + for (i = 0; i < n; i++) { + sum = 0; + for (j = 0; j < n; j++) { + sum += (i + j); + } + a[i] = sum; + } + return 0; +} + +/* { dg-final { scan-assembler-not {jr} } } */ +/* { dg-final { scan-assembler-times {ret} 1 } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ +/* { dg-final { scan-tree-dump "At most 8 number of live V_REG at program point 0 for bb 4" "vect" } } */ +/* { dg-final { scan-tree-dump "At most 40 number of live V_REG at program point 0 for bb 3" "vect" } } */ +/* { dg-final { scan-tree-dump "At most 8 number of live V_REG at program point 0 for bb 5" "vect" } } */ |