diff options
author | Pan Li <pan2.li@intel.com> | 2024-10-23 16:46:53 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2024-10-29 22:19:41 +0800 |
commit | 30435cc261071d389d9a210f598170ecdd5ea13c (patch) | |
tree | 9a8b9f19f378e340a16e7269af85d4b0a5d0800b | |
parent | 372060d78715d9a4ab756b1b95796bd04c0be2bf (diff) | |
download | gcc-30435cc261071d389d9a210f598170ecdd5ea13c.zip gcc-30435cc261071d389d9a210f598170ecdd5ea13c.tar.gz gcc-30435cc261071d389d9a210f598170ecdd5ea13c.tar.bz2 |
RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}
This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
the RISC-V backend by leveraging the vector strided load/store insn.
For example:
void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
for (int i = 0; i < n; i++)
a[i*stride] = b[i*stride] + 100;
}
Before this patch:
38 │ vsetvli a5,a3,e32,m1,ta,ma
39 │ vluxei64.v v1,(a1),v4
40 │ mul a4,a2,a5
41 │ sub a3,a3,a5
42 │ vadd.vv v1,v1,v2
43 │ vsuxei64.v v1,(a0),v4
44 │ add a1,a1,a4
45 │ add a0,a0,a4
After this patch:
33 │ vsetvli a5,a3,e32,m1,ta,ma
34 │ vlse32.v v1,0(a1),a2
35 │ mul a4,a2,a5
36 │ sub a3,a3,a5
37 │ vadd.vv v1,v1,v2
38 │ vsse32.v v1,0(a0),a2
39 │ add a1,a1,a4
40 │ add a0,a0,a4
The below test suites are passed for this patch:
* The riscv fully regression test.
gcc/ChangeLog:
* config/riscv/autovec.md (mask_len_strided_load_<mode>): Add
new pattern for MASK_LEN_STRIDED_LOAD.
(mask_len_strided_store_<mode>): Ditto but for store.
* config/riscv/riscv-protos.h (expand_strided_load): Add new
func decl to expand strided load.
(expand_strided_store): Ditto but for store.
* config/riscv/riscv-v.cc (expand_strided_load): Add new
func impl to expand strided load.
(expand_strided_store): Ditto but for store.
Signed-off-by: Pan Li <pan2.li@intel.com>
Co-Authored-By: Juzhe-Zhong <juzhe.zhong@rivai.ai>
-rw-r--r-- | gcc/config/riscv/autovec.md | 29 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 52 |
3 files changed, 83 insertions, 0 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 774a3d3..1f1849d 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2889,3 +2889,32 @@ DONE; } ) + +;; ========================================================================= +;; == Strided Load/Store +;; ========================================================================= +(define_expand "mask_len_strided_load_<mode>" + [(match_operand:V 0 "register_operand") + (match_operand 1 "pmode_reg_or_0_operand") + (match_operand 2 "pmode_reg_or_0_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_strided_load (<MODE>mode, operands); + DONE; + }) + +(define_expand "mask_len_strided_store_<mode>" + [(match_operand 0 "pmode_reg_or_0_operand") + (match_operand 1 "pmode_reg_or_0_operand") + (match_operand:V 2 "register_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_strided_store (<MODE>mode, operands); + DONE; + }) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 54f472a..0a6b43f 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -700,6 +700,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool); void emit_vec_extract (rtx, rtx, rtx); bool expand_vec_setmem (rtx, rtx, rtx); bool expand_vec_cmpmem (rtx, rtx, rtx, rtx); +void expand_strided_load (machine_mode, rtx *); +void expand_strided_store (machine_mode, rtx *); /* Rounding mode bitfield for fixed point VXRM. */ enum fixed_point_rounding_mode diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index c48b872..209b7ee 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load) } } +/* Expand MASK_LEN_STRIDED_LOAD. */ +void +expand_strided_load (machine_mode mode, rtx *ops) +{ + rtx v_reg = ops[0]; + rtx base = ops[1]; + rtx stride = ops[2]; + rtx mask = ops[3]; + rtx len = ops[4]; + poly_int64 len_val; + + insn_code icode = code_for_pred_strided_load (mode); + rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride}; + + if (poly_int_rtx_p (len, &len_val) + && known_eq (len_val, GET_MODE_NUNITS (mode))) + emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops); + else + { + len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len); + emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len); + } +} + +/* Expand MASK_LEN_STRIDED_STORE. */ +void +expand_strided_store (machine_mode mode, rtx *ops) +{ + rtx v_reg = ops[2]; + rtx base = ops[0]; + rtx stride = ops[1]; + rtx mask = ops[3]; + rtx len = ops[4]; + poly_int64 len_val; + rtx vl_type; + + if (poly_int_rtx_p (len, &len_val) + && known_eq (len_val, GET_MODE_NUNITS (mode))) + { + len = gen_reg_rtx (Pmode); + emit_vlmax_vsetvl (mode, len); + vl_type = get_avl_type_rtx (VLMAX); + } + else + { + len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len); + vl_type = get_avl_type_rtx (NONVLMAX); + } + + emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base), + mask, stride, v_reg, len, vl_type)); +} /* Return true if the operation is the floating-point operation need FRM. */ static bool |