diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-05-05 14:33:44 +0800 |
---|---|---|
committer | Kito Cheng <kito.cheng@sifive.com> | 2023-05-05 21:18:20 +0800 |
commit | d875d75653ed67e125e70dd8530182ce7872af65 (patch) | |
tree | df8feb915ad35031d3fabfd8768fc60e0a5f4bc5 | |
parent | 0fa872c40af4261064c13a1e4c57fa840d098d80 (diff) | |
download | gcc-d875d75653ed67e125e70dd8530182ce7872af65.zip gcc-d875d75653ed67e125e70dd8530182ce7872af65.tar.gz gcc-d875d75653ed67e125e70dd8530182ce7872af65.tar.bz2 |
RISC-V: Fix PR109615
This patch is to fix following case:
void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
{
size_t vl = 101;
if (cond)
vl = m * 2;
else
vl = m * 2 * vl;
for (size_t i = 0; i < n; i++)
{
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
__riscv_vse8_v_i8mf8 (out + i, v, vl);
vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in + i + 100, vl);
__riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
}
for (size_t i = 0; i < n; i++)
{
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
__riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
}
}
The value of "vl" is coming from different blocks so it will be wrapped as a PHI node of each
block.
In the first loop, the "vl" source is a PHI node from bb 4.
In the second loop, the "vl" source is a PHI node from bb 5.
since bb 5 is dominated by bb 4, the PHI input of "vl" in the second loop is the PHI node of "vl"
in bb 4.
So when 2 "vl" PHI node are both degenerate PHI node (the phi->num_inputs () == 1) and their only
input are same, it's safe for us to consider they are compatible.
This patch is only optimize degenerate PHI since it's safe and simple optimization.
non-dengerate PHI are considered as incompatible unless the PHI are the same in RTL_SSA.
TODO: non-generate PHI is complicated, we can support it when it is necessary in the future.
Before this patch:
...
.L2:
addi a4,a1,100
add t1,a0,a2
mv t0,a0
beq a2,zero,.L1
vsetvli zero,a3,e8,mf8,tu,mu
.L4:
addi a6,t0,100
addi a7,a4,-100
vle8.v v1,0(t0)
addi t0,t0,1
vse8.v v1,0(a7)
vlm.v v0,0(a6)
vle8.v v1,0(a6),v0.t
vse8.v v1,0(a4)
addi a4,a4,1
bne t0,t1,.L4
addi a0,a0,300
addi a1,a1,300
add a2,a0,a2
vsetvli zero,a3,e8,mf8,ta,ma
.L5:
vle8.v v2,0(a0)
addi a0,a0,1
vse8.v v2,0(a1)
addi a1,a1,1
bne a2,a0,.L5
.L1:
ret
After this patch:
...
.L2:
addi a4,a1,100
add t1,a0,a2
mv t0,a0
beq a2,zero,.L1
vsetvli zero,a3,e8,mf8,tu,mu
.L4:
addi a6,t0,100
addi a7,a4,-100
vle8.v v1,0(t0)
addi t0,t0,1
vse8.v v1,0(a7)
vlm.v v0,0(a6)
vle8.v v1,0(a6),v0.t
vse8.v v1,0(a4)
addi a4,a4,1
bne t0,t1,.L4
addi a0,a0,300
addi a1,a1,300
add a2,a0,a2
.L5:
vle8.v v2,0(a0)
addi a0,a0,1
vse8.v v2,0(a1)
addi a1,a1,1
bne a2,a0,.L5
.L1:
ret
PR target/109615
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (avl_info::multiple_source_equal_p): Add
denegrate PHI optmization.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/avl_single-74.c: Adapt testcase.
* gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
* gcc.target/riscv/rvv/vsetvl/pr109615.c: New test.
-rw-r--r-- | gcc/config/riscv/riscv-vsetvl.cc | 83 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-74.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109615.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c | 2 |
4 files changed, 56 insertions, 66 deletions
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 609f86d..b9172e5 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1676,72 +1676,29 @@ avl_info::single_source_equal_p (const avl_info &other) const bool avl_info::multiple_source_equal_p (const avl_info &other) const { - /* TODO: We don't do too much optimization here since it's - too complicated in case of analyzing the PHI node. + /* When the def info is same in RTL_SSA namespace, it's safe + to consider they are avl compatible. */ + if (m_source == other.get_source ()) + return true; - For example: - void f (void * restrict in, void * restrict out, int n, int m, int cond) - { - size_t vl; - switch (cond) - { - case 1: - vl = 100; - break; - case 2: - vl = *(size_t*)(in + 100); - break; - case 3: - { - size_t new_vl = *(size_t*)(in + 500); - size_t new_vl2 = *(size_t*)(in + 600); - vl = new_vl + new_vl2 + 777; - break; - } - default: - vl = 4000; - break; - } - for (size_t i = 0; i < n; i++) - { - vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); - __riscv_vse8_v_i8mf8 (out + i, v, vl); + /* We only consider handle PHI node. */ + if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ()) + return false; - vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); - __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); - } + phi_info *phi1 = as_a<phi_info *> (m_source); + phi_info *phi2 = as_a<phi_info *> (other.get_source ()); - size_t vl2; - switch (cond) - { - case 1: - vl2 = 100; - break; - case 2: - vl2 = *(size_t*)(in + 100); - break; - case 3: - { - size_t new_vl = *(size_t*)(in + 500); - size_t new_vl2 = *(size_t*)(in + 600); - vl2 = new_vl + new_vl2 + 777; - break; - } - default: - vl2 = 4000; - break; - } - for (size_t i = 0; i < m; i++) - { - vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl2); - __riscv_vse8_v_i8mf8 (out + i + 300, v, vl2); - vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl2); - __riscv_vse8_v_i8mf8 (out + i + 200, v2, vl2); - } - } - Such case may not be necessary to optimize since the codes of defining - vl and vl2 are redundant. */ - return m_source == other.get_source (); + if (phi1->is_degenerate () && phi2->is_degenerate ()) + { + /* Degenerate PHI means the PHI node only have one input. */ + + /* If both PHI nodes have the same single input in use list. + We consider they are AVL compatible. */ + if (phi1->input_value (0) == phi2->input_value (0)) + return true; + } + /* TODO: We can support more optimization cases in the future. */ + return false; } avl_info & diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-74.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-74.c index ff540ec..cc4f88b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-74.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-74.c @@ -23,5 +23,5 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int cond, size_t vl, } } -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109615.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109615.c new file mode 100644 index 0000000..90b0bb7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109615.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */ + +#include "riscv_vector.h" + +void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond) +{ + size_t vl = 101; + if (cond) + vl = m * 2; + else + vl = m * 2 * vl; + + for (size_t i = 0; i < n; i++) + { + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); + __riscv_vse8_v_i8mf8 (out + i, v, vl); + + vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl); + + vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tumu (mask, v, in + i + 100, vl); + __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); + } + + for (size_t i = 0; i < n; i++) + { + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl); + __riscv_vse8_v_i8mf8 (out + i + 300, v, vl); + } +} + +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c index fa825f0..3ef0fdc 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c @@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c } } -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ |