From 3dd6f73a231362a67ff6dee05510bb4fe6253820 Mon Sep 17 00:00:00 2001 From: "demin.han" Date: Wed, 20 Dec 2023 16:15:37 +0800 Subject: RISC-V: Fix calculation of max live vregs For the stmt _1 = _2 + _3, assume that _2 or _3 not used after this stmt. _1 can use same register with _2 or _3 if without early clobber. Two registers are needed, but current calculation is three. This patch preserves point 0 for bb entry and excludes its def when calculates live regs of certain point. Signed-off-by: demin.han gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (max_number_of_live_regs): Fix max live vregs calc (preferred_new_lmul_p): Ditto gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Moved to... * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c: ...here. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c: Moved to... * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: ...here. Signed-off-by: demin.han --- gcc/config/riscv/riscv-vector-costs.cc | 10 ++--- .../vect/costmodel/riscv/rvv/dynamic-lmul2-7.c | 26 ------------ .../vect/costmodel/riscv/rvv/dynamic-lmul4-10.c | 26 ++++++++++++ .../vect/costmodel/riscv/rvv/dynamic-lmul4-4.c | 47 ---------------------- .../vect/costmodel/riscv/rvv/dynamic-lmul8-11.c | 47 ++++++++++++++++++++++ 5 files changed, 78 insertions(+), 78 deletions(-) delete mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c delete mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index e7bc9ed..a316603 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -123,7 +123,7 @@ compute_local_program_points ( /* Collect the stmts that is vectorized and mark their program point. */ for (i = 0; i < nbbs; i++) { - int point = 0; + int point = 1; basic_block bb = bbs[i]; vec program_points = vNULL; if (dump_enabled_p ()) @@ -300,13 +300,13 @@ max_number_of_live_regs (const basic_block bb, unsigned int i; unsigned int live_point = 0; auto_vec live_vars_vec; - live_vars_vec.safe_grow_cleared (max_point + 1, true); + live_vars_vec.safe_grow_cleared (max_point, true); for (hash_map::iterator iter = live_ranges.begin (); iter != live_ranges.end (); ++iter) { tree var = (*iter).first; pair live_range = (*iter).second; - for (i = live_range.first; i <= live_range.second; i++) + for (i = live_range.first + 1; i <= live_range.second; i++) { machine_mode mode = TYPE_MODE (TREE_TYPE (var)); unsigned int nregs @@ -485,7 +485,7 @@ update_local_live_ranges ( if (!program_points_per_bb.get (e->src)) continue; unsigned int max_point - = (*program_points_per_bb.get (e->src)).length () - 1; + = (*program_points_per_bb.get (e->src)).length (); live_range = live_ranges->get (def); if (!live_range) continue; @@ -571,7 +571,7 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo) { basic_block bb = (*iter).first; unsigned int max_point - = (*program_points_per_bb.get (bb)).length () - 1; + = (*program_points_per_bb.get (bb)).length () + 1; if ((*iter).second.is_empty ()) continue; /* We prefer larger LMUL unless it causes register spillings. */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c deleted file mode 100644 index 636332d..0000000 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c +++ /dev/null @@ -1,26 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */ - -int -bar (int *x, int a, int b, int n) -{ - x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__); - int sum1 = 0; - int sum2 = 0; - for (int i = 0; i < n; ++i) - { - sum1 += x[2*i] - a; - sum1 += x[2*i+1] * b; - sum2 += x[2*i] - b; - sum2 += x[2*i+1] * a; - } - return sum1 + sum2; -} - -/* { dg-final { scan-assembler {e32,m2} } } */ -/* { dg-final { scan-assembler-not {jr} } } */ -/* { dg-final { scan-assembler-times {ret} 2 } } * -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ -/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c new file mode 100644 index 0000000..74e6291 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */ + +int +bar (int *x, int a, int b, int n) +{ + x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__); + int sum1 = 0; + int sum2 = 0; + for (int i = 0; i < n; ++i) + { + sum1 += x[2*i] - a; + sum1 += x[2*i+1] * b; + sum2 += x[2*i] - b; + sum2 += x[2*i+1] * a; + } + return sum1 + sum2; +} + +/* { dg-final { scan-assembler {e32,m4} } } */ +/* { dg-final { scan-assembler-not {jr} } } */ +/* { dg-final { scan-assembler-times {ret} 2 } } * +/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c deleted file mode 100644 index 01a359b..0000000 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */ - -#include - -void foo2 (int64_t *__restrict a, - int32_t *__restrict b, - int32_t *__restrict c, - int32_t *__restrict a2, - int32_t *__restrict b2, - int32_t *__restrict c2, - int32_t *__restrict a3, - int32_t *__restrict b3, - int32_t *__restrict c3, - int32_t *__restrict a4, - int32_t *__restrict b4, - int32_t *__restrict c4, - int64_t *__restrict a5, - int32_t *__restrict b5, - int32_t *__restrict c5, - int n) -{ - for (int i = 0; i < n; i++){ - a[i] = b[i] + c[i]; - b5[i] = b[i] + c[i]; - a2[i] = b2[i] + c2[i]; - a3[i] = b3[i] + c3[i]; - a4[i] = b4[i] + c4[i]; - a5[i] = a[i] + a4[i]; - a[i] = a5[i] + b5[i]+ a[i]; - - a[i] = a[i] + c[i]; - b5[i] = a[i] + c[i]; - a2[i] = a[i] + c2[i]; - a3[i] = a[i] + c3[i]; - a4[i] = a[i] + c4[i]; - a5[i] = a[i] + a4[i]; - a[i] = a[i] + b5[i]+ a[i]; - } -} - -/* { dg-final { scan-assembler {e64,m4} } } */ -/* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ -/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ -/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c new file mode 100644 index 0000000..01c976d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */ + +#include + +void foo2 (int64_t *__restrict a, + int32_t *__restrict b, + int32_t *__restrict c, + int32_t *__restrict a2, + int32_t *__restrict b2, + int32_t *__restrict c2, + int32_t *__restrict a3, + int32_t *__restrict b3, + int32_t *__restrict c3, + int32_t *__restrict a4, + int32_t *__restrict b4, + int32_t *__restrict c4, + int64_t *__restrict a5, + int32_t *__restrict b5, + int32_t *__restrict c5, + int n) +{ + for (int i = 0; i < n; i++){ + a[i] = b[i] + c[i]; + b5[i] = b[i] + c[i]; + a2[i] = b2[i] + c2[i]; + a3[i] = b3[i] + c3[i]; + a4[i] = b4[i] + c4[i]; + a5[i] = a[i] + a4[i]; + a[i] = a5[i] + b5[i]+ a[i]; + + a[i] = a[i] + c[i]; + b5[i] = a[i] + c[i]; + a2[i] = a[i] + c2[i]; + a3[i] = a[i] + c3[i]; + a4[i] = a[i] + c4[i]; + a5[i] = a[i] + a4[i]; + a[i] = a[i] + b5[i]+ a[i]; + } +} + +/* { dg-final { scan-assembler {e64,m8} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ -- cgit v1.1