aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLehua Ding <lehua.ding@rivai.ai>2023-09-08 16:54:19 +0800
committerLehua Ding <lehua.ding@rivai.ai>2023-09-09 08:58:11 +0800
commit7547f65f60c0bbf8de704c569c92c7a0e31a6175 (patch)
tree6dace8b3bff6ea0ed3f3aae8a59a199b54369a23
parentfd0b95211180bb4cc785f6da6af3561016c77acd (diff)
downloadgcc-7547f65f60c0bbf8de704c569c92c7a0e31a6175.zip
gcc-7547f65f60c0bbf8de704c569c92c7a0e31a6175.tar.gz
gcc-7547f65f60c0bbf8de704c569c92c7a0e31a6175.tar.bz2
Support folding min(poly,poly) to const
This patch adds support that tries to fold `MIN (poly, poly)` to a constant. Consider the following C Code: ``` void foo2 (int* restrict a, int* restrict b, int n) { for (int i = 0; i < 3; i += 1) a[i] += b[i]; } ``` Before this patch: ``` void foo2 (int * restrict a, int * restrict b, int n) { vector([4,4]) int vect__7.27; vector([4,4]) int vect__6.26; vector([4,4]) int vect__4.23; unsigned long _32; <bb 2> [local count: 268435456]: _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>; vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0); vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0); vect__7.27_9 = vect__6.26_15 + vect__4.23_20; .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, vect__7.27_9); [tail call] return; } ``` After this patch: ``` void foo2 (int * restrict a, int * restrict b, int n) { vector([4,4]) int vect__7.27; vector([4,4]) int vect__6.26; vector([4,4]) int vect__4.23; <bb 2> [local count: 268435456]: vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0); vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0); vect__7.27_9 = vect__6.26_15 + vect__4.23_20; .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, vect__7.27_9); [tail call] return; } ``` For RISC-V RVV, csrr and branch instructions can be reduced: Before this patch: ``` foo2: csrr a4,vlenb srli a4,a4,2 li a5,3 bleu a5,a4,.L5 mv a5,a4 .L5: vsetvli zero,a5,e32,m1,ta,ma ... ``` After this patch. ``` foo2: vsetivli zero,3,e32,m1,ta,ma ... ``` gcc/ChangeLog: * fold-const.cc (can_min_p): New function. (poly_int_binop): Try fold MIN_EXPR. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust. * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust. * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test.
-rw-r--r--gcc/fold-const.cc24
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c24
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c2
4 files changed, 50 insertions, 2 deletions
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 1da498a..d19b466 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1213,6 +1213,25 @@ wide_int_binop (wide_int &res,
return true;
}
+/* Returns true if we know who is smaller or equal, ARG1 or ARG2, and set the
+ min value to RES. */
+bool
+can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res)
+{
+ if (known_le (wi::to_poly_widest (arg1), wi::to_poly_widest (arg2)))
+ {
+ res = wi::to_poly_wide (arg1);
+ return true;
+ }
+ else if (known_le (wi::to_poly_widest (arg2), wi::to_poly_widest (arg1)))
+ {
+ res = wi::to_poly_wide (arg2);
+ return true;
+ }
+
+ return false;
+}
+
/* Combine two poly int's ARG1 and ARG2 under operation CODE to
produce a new constant in RES. Return FALSE if we don't know how
to evaluate CODE at compile-time. */
@@ -1261,6 +1280,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code code,
return false;
break;
+ case MIN_EXPR:
+ if (!can_min_p (arg1, arg2, res))
+ return false;
+ break;
+
default:
return false;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
new file mode 100644
index 0000000..de4c472
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 -fno-vect-cost-model" } */
+
+void foo1 (int* restrict a, int* restrict b, int n)
+{
+ for (int i = 0; i < 4; i += 1)
+ a[i] += b[i];
+}
+
+void foo2 (int* restrict a, int* restrict b, int n)
+{
+ for (int i = 0; i < 3; i += 1)
+ a[i] += b[i];
+}
+
+void foo3 (int* restrict a, int* restrict b, int n)
+{
+ for (int i = 0; i < 5; i += 1)
+ a[i] += b[i];
+}
+
+/* { dg-final { scan-assembler-not {\tcsrr\t} } } */
+/* { dg-final { scan-assembler {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */
+/* { dg-final { scan-assembler {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
index f3388a8..40224c6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c
@@ -55,4 +55,4 @@ DEF_OP_VV (div, 512, int64_t, /)
/* { dg-final { scan-assembler-times {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */
/* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */
-/* { dg-final { scan-assembler-times {csrr} 19 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
index 98822b1..b34a349 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
@@ -55,4 +55,4 @@ DEF_OP_VV (shift, 512, int64_t, <<)
/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */
/* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */
-/* { dg-final { scan-assembler-times {csrr} 18 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */