diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2024-01-06 10:08:55 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2024-01-06 10:12:43 +0800 |
commit | 9873f13d833b536b46cd6ff46d72e62407b048a8 (patch) | |
tree | 2954af305be21d4d5f75081bc4cba70261981af9 | |
parent | 5a0b3355d956f5d36f9b562e027b890cc5f61d88 (diff) | |
download | gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.zip gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.tar.gz gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.tar.bz2 |
RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move
V2: Address comments from Robin.
While working on fixing a bug, I notice this following code has redundant move:
#include "riscv_vector.h"
void
f (float x, float y, void *out)
{
float f[4] = { x, x, x, y };
vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
__riscv_vse32_v_f32m1 (out, v, 4);
}
Before this patch:
f:
vsetivli zero,4,e32,m1,ta,ma
addi sp,sp,-16
vfmv.v.f v1,fa0
vfslide1down.vf v1,v1,fa1
vmv.v.v v1,v1 ----> redundant move.
vse32.v v1,0(a0)
addi sp,sp,16
jr ra
The rootcause is that the complicate vmv.v.v pattern doesn't simplify it
into simple (set (reg) (reg)) reg-to-reg move pattern.
Currently, we support such simplification for VLMAX.
However, the case I found is non-VLMAX but with LEN = NUNITS which should be
considered as equivalent to VLMAX.
Add a simple fix for such situation.
Tested on both RV32/RV64 no regressions.
gcc/ChangeLog:
* config/riscv/riscv-protos.h (whole_reg_to_reg_move_p): New function.
* config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
* config/riscv/vector.md: Allow non-vlmax with len = NUNITS simplification.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/vf_avl-4.c: New test.
-rw-r--r-- | gcc/config/riscv/riscv-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 23 | ||||
-rw-r--r-- | gcc/config/riscv/vector.md | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c | 13 |
4 files changed, 39 insertions, 7 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 0f0337c..00a5b64 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -687,6 +687,7 @@ bool imm_avl_p (machine_mode); bool can_be_broadcasted_p (rtx); bool gather_scatter_valid_offset_p (machine_mode); HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int); +bool whole_reg_to_reg_move_p (rtx *, machine_mode, int); } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index ec85964..2491522 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -5117,4 +5117,27 @@ estimated_poly_value (poly_int64 val, unsigned int kind) return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN; } +/* Return true it is whole register-register move. */ +bool +whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index) +{ + /* An operation is a whole-register move if either + (1) Its vlmax operand equals VLMAX + (2) Its vl operand equals the number of units of its mode. */ + if (register_operand (ops[0], mode) + && register_operand (ops[3], mode) + && satisfies_constraint_vu (ops[2]) + && satisfies_constraint_Wc1 (ops[1])) + { + if (INTVAL (ops[avl_type_index]) == VLMAX) + return true; + /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32 + into NON-VLMAX with LEN = NUNITS. */ + else if (CONST_INT_P (ops[4]) + && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode))) + return true; + } + return false; +} + } // namespace riscv_vector diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 3d2c1c3..be5beb5 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1724,10 +1724,7 @@ vse<sew>.v\t%3,%0%p1 vmv.v.v\t%0,%3 vmv.v.v\t%0,%3" - "&& register_operand (operands[0], <MODE>mode) - && register_operand (operands[3], <MODE>mode) - && satisfies_constraint_vu (operands[2]) - && INTVAL (operands[7]) == riscv_vector::VLMAX" + "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)" [(set (match_dup 0) (match_dup 3))] "" [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov") @@ -1776,9 +1773,7 @@ vmmv.m\t%0,%3 vmclr.m\t%0 vmset.m\t%0" - "&& register_operand (operands[0], <MODE>mode) - && register_operand (operands[3], <MODE>mode) - && INTVAL (operands[5]) == riscv_vector::VLMAX" + "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)" [(set (match_dup 0) (match_dup 3))] "" [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c new file mode 100644 index 0000000..1b4bfd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */ + +#include "riscv_vector.h" +void +f (float x, float y, void *out) +{ + float f[4] = { x, x, x, y }; + vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4); + __riscv_vse32_v_f32m1 (out, v, 4); +} + +/* { dg-final { scan-assembler-not {vmv} } } */ |