diff options
author | Vineet Gupta <vineetg@rivosinc.com> | 2022-05-23 11:12:09 -0700 |
---|---|---|
committer | Kito Cheng <kito.cheng@sifive.com> | 2022-05-24 15:55:17 +0800 |
commit | b646d7d279ae0c0d35564542d09866bf3e8afac0 (patch) | |
tree | c60dfdaef8bc6ee7ccfaa38a818f02d88efa97e8 /gcc | |
parent | b43836914bdc2a37563cf31359b2c4803bfe4374 (diff) | |
download | gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.zip gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.tar.gz gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.tar.bz2 |
RISC-V: Inhibit FP <--> int register moves via tune param
Under extreme register pressure, compiler can use FP <--> int
moves as a cheap alternate to spilling to memory.
This was seen with SPEC2017 FP benchmark 507.cactu:
ML_BSSN_Advect.cc:ML_BSSN_Advect_Body()
| fmv.d.x fa5,s9 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1
| .LVL325:
| ld s9,184(sp) # _12469, %sfp
| ...
| .LVL339:
| fmv.x.d s4,fa5 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1
|
The FMV instructions could be costlier (than stack spill) on certain
micro-architectures, thus this needs to be a per-cpu tunable
(default being to inhibit on all existing RV cpus).
Testsuite run with new test reports 10 failures without the fix
corresponding to the build variations of pr105666.c
| === gcc Summary ===
|
| # of expected passes 123318 (+10)
| # of unexpected failures 34 (-10)
| # of unexpected successes 4
| # of expected failures 780
| # of unresolved testcases 4
| # of unsupported tests 2796
gcc/ChangeLog:
* config/riscv/riscv.cc: (struct riscv_tune_param): Add
fmv_cost.
(rocket_tune_info): Add default fmv_cost 8.
(sifive_7_tune_info): Ditto.
(thead_c906_tune_info): Ditto.
(optimize_size_tune_info): Ditto.
(riscv_register_move_cost): Use fmv_cost for int<->fp moves.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105666.c: New test.
Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/riscv/riscv.cc | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/pr105666.c | 55 |
2 files changed, 64 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index fb66214..f83dc79 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -220,6 +220,7 @@ struct riscv_tune_param unsigned short issue_rate; unsigned short branch_cost; unsigned short memory_cost; + unsigned short fmv_cost; bool slow_unaligned_access; }; @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = { 1, /* issue_rate */ 3, /* branch_cost */ 5, /* memory_cost */ + 8, /* fmv_cost */ true, /* slow_unaligned_access */ }; @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { 2, /* issue_rate */ 4, /* branch_cost */ 3, /* memory_cost */ + 8, /* fmv_cost */ true, /* slow_unaligned_access */ }; @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { 1, /* issue_rate */ 3, /* branch_cost */ 5, /* memory_cost */ + 8, /* fmv_cost */ false, /* slow_unaligned_access */ }; @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { 1, /* issue_rate */ 1, /* branch_cost */ 2, /* memory_cost */ + 8, /* fmv_cost */ false, /* slow_unaligned_access */ }; @@ -4743,6 +4748,10 @@ static int riscv_register_move_cost (machine_mode mode, reg_class_t from, reg_class_t to) { + if ((from == FP_REGS && to == GR_REGS) || + (from == GR_REGS && to == FP_REGS)) + return tune_param->fmv_cost; + return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2; } diff --git a/gcc/testsuite/gcc.target/riscv/pr105666.c b/gcc/testsuite/gcc.target/riscv/pr105666.c new file mode 100644 index 0000000..904f3bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr105666.c @@ -0,0 +1,55 @@ +/* Shamelessly plugged off gcc/testsuite/gcc.c-torture/execute/pr28982a.c. + + The idea is to induce high register pressure for both int/fp registers + so that they spill. By default FMV instructions would be used to stash + int reg to a fp reg (and vice-versa) but that could be costlier than + spilling to stack. */ + +/* { dg-do compile } */ +/* { dg-options "-march=rv64g -ffast-math" } */ + +#define NITER 4 +#define NVARS 20 +#define MULTI(X) \ + X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \ + X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19) + +#define DECLAREI(INDEX) inc##INDEX = incs[INDEX] +#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 5 +#define LOOP(INDEX) result##INDEX += result##INDEX * (*ptr##INDEX), ptr##INDEX += inc##INDEX +#define COPYOUT(INDEX) results[INDEX] = result##INDEX + +double *ptrs[NVARS]; +double results[NVARS]; +int incs[NVARS]; + +void __attribute__((noinline)) +foo (int n) +{ + int MULTI (DECLAREI); + double MULTI (DECLAREF); + while (n--) + MULTI (LOOP); + MULTI (COPYOUT); +} + +double input[NITER * NVARS]; + +int +main (void) +{ + int i; + + for (i = 0; i < NVARS; i++) + ptrs[i] = input + i, incs[i] = i; + for (i = 0; i < NITER * NVARS; i++) + input[i] = i; + foo (NITER); + for (i = 0; i < NVARS; i++) + if (results[i] != i * NITER * (NITER + 1) / 2) + return 1; + return 0; +} + +/* { dg-final { scan-assembler-not "\tfmv\\.d\\.x\t" } } */ +/* { dg-final { scan-assembler-not "\tfmv\\.x\\.d\t" } } */ |