RISC-V: Inhibit FP <--> int register moves via tune param

Under extreme register pressure, compiler can use FP <--> int moves as a cheap alternate to spilling to memory. This was seen with SPEC2017 FP benchmark 507.cactu: ML_BSSN_Advect.cc:ML_BSSN_Advect_Body() | fmv.d.x fa5,s9 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1 | .LVL325: | ld s9,184(sp) # _12469, %sfp | ... | .LVL339: | fmv.x.d s4,fa5 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1 | The FMV instructions could be costlier (than stack spill) on certain micro-architectures, thus this needs to be a per-cpu tunable (default being to inhibit on all existing RV cpus). Testsuite run with new test reports 10 failures without the fix corresponding to the build variations of pr105666.c | === gcc Summary === | | # of expected passes 123318 (+10) | # of unexpected failures 34 (-10) | # of unexpected successes 4 | # of expected failures 780 | # of unresolved testcases 4 | # of unsupported tests 2796 gcc/ChangeLog: * config/riscv/riscv.cc: (struct riscv_tune_param): Add fmv_cost. (rocket_tune_info): Add default fmv_cost 8. (sifive_7_tune_info): Ditto. (thead_c906_tune_info): Ditto. (optimize_size_tune_info): Ditto. (riscv_register_move_cost): Use fmv_cost for int<->fp moves. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr105666.c: New test. Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
author: Vineet Gupta <vineetg@rivosinc.com> 2022-05-23 11:12:09 -0700
committer: Kito Cheng <kito.cheng@sifive.com> 2022-05-24 15:55:17 +0800
commit: b646d7d279ae0c0d35564542d09866bf3e8afac0 (patch)
tree: c60dfdaef8bc6ee7ccfaa38a818f02d88efa97e8
parent: b43836914bdc2a37563cf31359b2c4803bfe4374 (diff)
download: gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.zip
gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.tar.gz
gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.tar.bz2
2 files changed, 64 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index fb66214..f83dc79 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -220,6 +220,7 @@ struct riscv_tune_param
   unsigned short issue_rate;
   unsigned short branch_cost;
   unsigned short memory_cost;
+  unsigned short fmv_cost;
   bool slow_unaligned_access;
 };
 
@@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   1,						/* issue_rate */
   3,						/* branch_cost */
   5,						/* memory_cost */
+  8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
 };
 
@@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   2,						/* issue_rate */
   4,						/* branch_cost */
   3,						/* memory_cost */
+  8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
 };
 
@@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   1,            /* issue_rate */
   3,            /* branch_cost */
   5,            /* memory_cost */
+  8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
 };
 
@@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   1,						/* issue_rate */
   1,						/* branch_cost */
   2,						/* memory_cost */
+  8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
 };
 
@@ -4743,6 +4748,10 @@ static int
 riscv_register_move_cost (machine_mode mode,
 			  reg_class_t from, reg_class_t to)
 {
+  if ((from == FP_REGS && to == GR_REGS) ||
+      (from == GR_REGS && to == FP_REGS))
+    return tune_param->fmv_cost;
+
   return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr105666.c b/gcc/testsuite/gcc.target/riscv/pr105666.c
new file mode 100644
index 0000000..904f3bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr105666.c
@@ -0,0 +1,55 @@
+/* Shamelessly plugged off gcc/testsuite/gcc.c-torture/execute/pr28982a.c.  
+
+   The idea is to induce high register pressure for both int/fp registers
+   so that they spill. By default FMV instructions would be used to stash
+   int reg to a fp reg (and vice-versa) but that could be costlier than
+   spilling to stack.  */
+
+/* { dg-do compile } */
+/* { dg-options "-march=rv64g -ffast-math" } */
+
+#define NITER 4
+#define NVARS 20
+#define MULTI(X) \
+  X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
+  X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
+
+#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
+#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 5
+#define LOOP(INDEX) result##INDEX += result##INDEX * (*ptr##INDEX), ptr##INDEX += inc##INDEX
+#define COPYOUT(INDEX) results[INDEX] = result##INDEX
+
+double *ptrs[NVARS];
+double results[NVARS];
+int incs[NVARS];
+
+void __attribute__((noinline))
+foo (int n)
+{
+  int MULTI (DECLAREI);
+  double MULTI (DECLAREF);
+  while (n--)
+    MULTI (LOOP);
+  MULTI (COPYOUT);
+}
+
+double input[NITER * NVARS];
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < NVARS; i++)
+    ptrs[i] = input + i, incs[i] = i;
+  for (i = 0; i < NITER * NVARS; i++)
+    input[i] = i;
+  foo (NITER);
+  for (i = 0; i < NVARS; i++)
+    if (results[i] != i * NITER * (NITER + 1) / 2)
+      return 1;
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "\tfmv\\.d\\.x\t" } } */
+/* { dg-final { scan-assembler-not "\tfmv\\.x\\.d\t" } } */
author	Vineet Gupta <vineetg@rivosinc.com>	2022-05-23 11:12:09 -0700
committer	Kito Cheng <kito.cheng@sifive.com>	2022-05-24 15:55:17 +0800
commit	b646d7d279ae0c0d35564542d09866bf3e8afac0 (patch)
tree	c60dfdaef8bc6ee7ccfaa38a818f02d88efa97e8
parent	b43836914bdc2a37563cf31359b2c4803bfe4374 (diff)
download	gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.zip gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.tar.gz gcc-b646d7d279ae0c0d35564542d09866bf3e8afac0.tar.bz2