aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2024-01-04 16:22:48 +0800
committerLehua Ding <lehua.ding@rivai.ai>2024-01-04 17:20:20 +0800
commitb1342247a44c410ad6a44dfd82813fafe2ea7c1d (patch)
tree66933ab506e46d5909fece8ff2ab782eace777c9
parent73a4f67b9c8c497d87fda44160953293bc4e11e5 (diff)
downloadgcc-b1342247a44c410ad6a44dfd82813fafe2ea7c1d.zip
gcc-b1342247a44c410ad6a44dfd82813fafe2ea7c1d.tar.gz
gcc-b1342247a44c410ad6a44dfd82813fafe2ea7c1d.tar.bz2
RISC-V: Make liveness estimation be aware of .vi variant
Consider this following case: void f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n) { for (int i = 0; i < n; i++) { int tmp = b[i] + 15; int tmp2 = tmp + b[i]; c[i] = tmp2 + b[i]; d[i] = tmp + tmp2 + b[i]; } } Current dynamic LMUL cost model choose LMUL = 4 because we count the "15" as consuming 1 vector register group which is not accurate. We teach the dynamic LMUL cost model be aware of the potential vi variant instructions transformation, so that we can choose LMUL = 8 according to more accurate cost model. After this patch: f: ble a4,zero,.L5 .L3: vsetvli a5,a4,e32,m8,ta,ma slli a0,a5,2 vle32.v v16,0(a1) vadd.vi v24,v16,15 vadd.vv v8,v24,v16 vadd.vv v0,v8,v16 vse32.v v0,0(a2) vadd.vv v8,v8,v24 vadd.vv v8,v8,v16 vse32.v v8,0(a3) add a1,a1,a0 add a2,a2,a0 add a3,a3,a0 sub a4,a4,a5 bne a4,zero,.L3 .L5: ret Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (variable_vectorized_p): Teach vi variant. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c: New test.
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc30
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c74
2 files changed, 97 insertions, 7 deletions
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 21f8a81..e443503 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -255,6 +255,29 @@ variable_vectorized_p (stmt_vec_info stmt_info, tree var, bool lhs_p)
return false;
}
}
+ else if (is_gimple_assign (stmt))
+ {
+ tree_code tcode = gimple_assign_rhs_code (stmt);
+ /* vi variant doesn't need to allocate such statement.
+ E.g. tmp_15 = _4 + 1; will be transformed into vadd.vi
+ so the INTEGER_CST '1' doesn't need a vector register. */
+ switch (tcode)
+ {
+ case PLUS_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case BIT_AND_EXPR:
+ return TREE_CODE (var) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (var), -16, 15);
+ case MINUS_EXPR:
+ return TREE_CODE (var) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (var), -16, 15)
+ || gimple_assign_rhs1 (stmt) != var;
+ default:
+ break;
+ }
+ }
+
if (lhs_p)
return is_gimple_reg (var)
&& (!POINTER_TYPE_P (TREE_TYPE (var))
@@ -331,13 +354,6 @@ compute_local_live_ranges (
for (i = 0; i < gimple_num_args (stmt); i++)
{
tree var = gimple_arg (stmt, i);
- /* Both IMM and REG are included since a VECTOR_CST may be
- potentially held in a vector register. However, it's not
- accurate, since a PLUS_EXPR can be vectorized into vadd.vi
- if IMM is -16 ~ 15.
-
- TODO: We may elide the cases that the unnecessary IMM in
- the future. */
if (variable_vectorized_p (program_point.stmt_info, var,
false))
{
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
new file mode 100644
index 0000000..baef4e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-13.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fdump-tree-vect-details" } */
+
+void
+f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] + 15;
+ int tmp2 = tmp + b[i];
+ c[i] = tmp2 + b[i];
+ d[i] = tmp + tmp2 + b[i];
+ }
+}
+
+void
+f2 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = 15 - b[i];
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i];
+ }
+}
+
+void
+f3 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] & 15;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i];
+ }
+}
+
+void
+f4 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] | 15;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i];
+ }
+}
+
+void
+f5 (int *restrict a, int *restrict b, int *restrict c, int *restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int tmp = b[i] ^ 15;
+ int tmp2 = tmp * b[i];
+ c[i] = tmp2 * b[i];
+ d[i] = tmp * tmp2 * b[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 5 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {e32,m2} } } */
+/* { dg-final { scan-assembler-not {e32,m1} } } */
+/* { dg-final { scan-assembler-times {ret} 5 } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 5 "vect" } } */