aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdwin Lu <ewlu@rivosinc.com>2025-06-10 13:26:42 -0700
committerEdwin Lu <ewlu@rivosinc.com>2025-06-11 11:07:20 -0700
commit1c5e99ce8744c166d82cb96bbb2d58392b5fb8d7 (patch)
tree29182d7eb49951ccc0e56628f1741773c6822c4a
parent76cbd678d123ed93f99c4c52456bc14290f19b7f (diff)
downloadgcc-1c5e99ce8744c166d82cb96bbb2d58392b5fb8d7.zip
gcc-1c5e99ce8744c166d82cb96bbb2d58392b5fb8d7.tar.gz
gcc-1c5e99ce8744c166d82cb96bbb2d58392b5fb8d7.tar.bz2
RISC-V: Prevent speculative vsetvl insn scheduling
The instruction scheduler appears to be speculatively hoisting vsetvl insns outside of their basic block without checking for data dependencies. This resulted in a situation where the following occurs vsetvli a5,a1,e32,m1,tu,ma vle32.v v2,0(a0) sub a1,a1,a5 <-- a1 potentially set to 0 sh2add a0,a5,a0 vfmacc.vv v1,v2,v2 vsetvli a5,a1,e32,m1,tu,ma <-- incompatible vinfo. update vl to 0 beq a1,zero,.L12 <-- check if avl is 0 This patch would essentially delay the vsetvl update to after the branch to prevent unnecessarily updating the vinfo at the end of a basic block. PR/117974 gcc/ChangeLog: * config/riscv/riscv.cc (struct riscv_tune_param): Add tune param. (riscv_sched_can_speculate_insn): Implement. (TARGET_SCHED_CAN_SPECULATE_INSN): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/pr117974.c: New test. Signed-off-by: Edwin Lu <ewlu@rivosinc.com>
-rw-r--r--gcc/config/riscv/riscv.cc35
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c19
2 files changed, 54 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0c99850..c6e0ffb 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -302,6 +302,7 @@ struct riscv_tune_param
bool vector_unaligned_access;
bool use_divmod_expansion;
bool overlap_op_by_pieces;
+ bool speculative_sched_vsetvl;
unsigned int fusible_ops;
const struct cpu_vector_cost *vec_costs;
const char *function_align;
@@ -464,6 +465,7 @@ static const struct riscv_tune_param rocket_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -486,6 +488,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -508,6 +511,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -530,6 +534,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -552,6 +557,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -574,6 +580,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -596,6 +603,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
true, /* vector_unaligned_access */
false, /* use_divmod_expansion */
true, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -618,6 +626,7 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = {
true, /* vector_unaligned_access */
true, /* use_divmod_expansion */
true, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
&generic_vector_cost, /* vector cost */
NULL, /* function_align */
@@ -640,6 +649,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -662,6 +672,7 @@ static const struct riscv_tune_param mips_p8700_tune_info = {
false, /* vector_unaligned_access */
true, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
+ false, /* speculative_sched_vsetvl */
RISCV_FUSE_NOTHING, /* fusible_ops */
NULL, /* vector cost */
NULL, /* function_align */
@@ -10441,6 +10452,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
return new_cost;
}
+/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can
+ can be scheduled for speculative execution. Reject vsetvl instructions to
+ prevent the scheduler from hoisting them out of basic blocks without
+ checking for data dependencies PR117974. */
+static bool
+riscv_sched_can_speculate_insn (rtx_insn *insn)
+{
+ /* Gate speculative scheduling of vsetvl instructions behind tune param. */
+ if (tune_param->speculative_sched_vsetvl)
+ return true;
+
+ switch (get_attr_type (insn))
+ {
+ case TYPE_VSETVL:
+ case TYPE_VSETVL_PRE:
+ return false;
+ default:
+ return true;
+ }
+}
+
/* Auxiliary function to emit RISC-V ELF attribute. */
static void
riscv_emit_attribute ()
@@ -14745,6 +14777,9 @@ synthesize_and (rtx operands[3])
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
+#undef TARGET_SCHED_CAN_SPECULATE_INSN
+#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
+
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
new file mode 100644
index 0000000..bf99240
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl -Ofast" } */
+
+float g(float q[], int N){
+ float dqnorm = 0.0;
+
+ #pragma GCC unroll 4
+
+ for (int i=0; i < N; i++) {
+ dqnorm = dqnorm + q[i] * q[i];
+ }
+ return dqnorm;
+}
+
+/* need slightly different test for when -funroll-loops is enabled to keep
+ test output stable. Otherwise test may be flakey. */
+/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,zero,.L12\s+vsetvli} 3 { target { no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,[a-x0-9]+,.L12\s+vsetvli} 3 { target { any-opts "-funroll-loops" } } } } */
+