aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2024-10-23 16:46:53 +0800
committerPan Li <pan2.li@intel.com>2024-10-29 22:19:41 +0800
commit30435cc261071d389d9a210f598170ecdd5ea13c (patch)
tree9a8b9f19f378e340a16e7269af85d4b0a5d0800b
parent372060d78715d9a4ab756b1b95796bd04c0be2bf (diff)
downloadgcc-30435cc261071d389d9a210f598170ecdd5ea13c.zip
gcc-30435cc261071d389d9a210f598170ecdd5ea13c.tar.gz
gcc-30435cc261071d389d9a210f598170ecdd5ea13c.tar.bz2
RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}
This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in the RISC-V backend by leveraging the vector strided load/store insn. For example: void foo (int * __restrict a, int * __restrict b, int stride, int n) { for (int i = 0; i < n; i++) a[i*stride] = b[i*stride] + 100; } Before this patch: 38 │ vsetvli a5,a3,e32,m1,ta,ma 39 │ vluxei64.v v1,(a1),v4 40 │ mul a4,a2,a5 41 │ sub a3,a3,a5 42 │ vadd.vv v1,v1,v2 43 │ vsuxei64.v v1,(a0),v4 44 │ add a1,a1,a4 45 │ add a0,a0,a4 After this patch: 33 │ vsetvli a5,a3,e32,m1,ta,ma 34 │ vlse32.v v1,0(a1),a2 35 │ mul a4,a2,a5 36 │ sub a3,a3,a5 37 │ vadd.vv v1,v1,v2 38 │ vsse32.v v1,0(a0),a2 39 │ add a1,a1,a4 40 │ add a0,a0,a4 The below test suites are passed for this patch: * The riscv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (mask_len_strided_load_<mode>): Add new pattern for MASK_LEN_STRIDED_LOAD. (mask_len_strided_store_<mode>): Ditto but for store. * config/riscv/riscv-protos.h (expand_strided_load): Add new func decl to expand strided load. (expand_strided_store): Ditto but for store. * config/riscv/riscv-v.cc (expand_strided_load): Add new func impl to expand strided load. (expand_strided_store): Ditto but for store. Signed-off-by: Pan Li <pan2.li@intel.com> Co-Authored-By: Juzhe-Zhong <juzhe.zhong@rivai.ai>
-rw-r--r--gcc/config/riscv/autovec.md29
-rw-r--r--gcc/config/riscv/riscv-protos.h2
-rw-r--r--gcc/config/riscv/riscv-v.cc52
3 files changed, 83 insertions, 0 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 774a3d3..1f1849d 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2889,3 +2889,32 @@
DONE;
}
)
+
+;; =========================================================================
+;; == Strided Load/Store
+;; =========================================================================
+(define_expand "mask_len_strided_load_<mode>"
+ [(match_operand:V 0 "register_operand")
+ (match_operand 1 "pmode_reg_or_0_operand")
+ (match_operand 2 "pmode_reg_or_0_operand")
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_strided_load (<MODE>mode, operands);
+ DONE;
+ })
+
+(define_expand "mask_len_strided_store_<mode>"
+ [(match_operand 0 "pmode_reg_or_0_operand")
+ (match_operand 1 "pmode_reg_or_0_operand")
+ (match_operand:V 2 "register_operand")
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_strided_store (<MODE>mode, operands);
+ DONE;
+ })
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 54f472a..0a6b43f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -700,6 +700,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, rtx);
bool expand_vec_setmem (rtx, rtx, rtx);
bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c48b872..209b7ee 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load)
}
}
+/* Expand MASK_LEN_STRIDED_LOAD. */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+ rtx v_reg = ops[0];
+ rtx base = ops[1];
+ rtx stride = ops[2];
+ rtx mask = ops[3];
+ rtx len = ops[4];
+ poly_int64 len_val;
+
+ insn_code icode = code_for_pred_strided_load (mode);
+ rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+ if (poly_int_rtx_p (len, &len_val)
+ && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+ else
+ {
+ len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+ emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+ }
+}
+
+/* Expand MASK_LEN_STRIDED_STORE. */
+void
+expand_strided_store (machine_mode mode, rtx *ops)
+{
+ rtx v_reg = ops[2];
+ rtx base = ops[0];
+ rtx stride = ops[1];
+ rtx mask = ops[3];
+ rtx len = ops[4];
+ poly_int64 len_val;
+ rtx vl_type;
+
+ if (poly_int_rtx_p (len, &len_val)
+ && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ {
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, len);
+ vl_type = get_avl_type_rtx (VLMAX);
+ }
+ else
+ {
+ len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+ vl_type = get_avl_type_rtx (NONVLMAX);
+ }
+
+ emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base),
+ mask, stride, v_reg, len, vl_type));
+}
/* Return true if the operation is the floating-point operation need FRM. */
static bool