diff options
author | Craig Blackmore <craig.blackmore@embecosm.com> | 2024-11-04 13:55:19 -0700 |
---|---|---|
committer | Jeff Law <jlaw@ventanamicro.com> | 2024-11-04 13:55:19 -0700 |
commit | b30c6a5eabaf476663f1a1e41165967e782eccd3 (patch) | |
tree | 757153dde1595a5305c825c9673ff70279934af3 /gcc | |
parent | fe97ac43e05a8da8a12fbad2208a1ebb19d2d6c9 (diff) | |
download | gcc-b30c6a5eabaf476663f1a1e41165967e782eccd3.zip gcc-b30c6a5eabaf476663f1a1e41165967e782eccd3.tar.gz gcc-b30c6a5eabaf476663f1a1e41165967e782eccd3.tar.bz2 |
[PATCH v2 1/2] RISC-V: Make vectorized memset handle more cases
`expand_vec_setmem` only generated vectorized memset if it fitted into a
single vector store of at least (TARGET_MIN_VLEN / 8) bytes. Also,
without dynamic LMUL the operation was always TARGET_MAX_LMUL even if it
would have fitted a smaller LMUL.
Allow vectorized memset to be generated for smaller lengths and smaller
LMUL by switching to using use_vector_string_op. Smaller LMUL can be
seen in setmem-3.c:f3. Smaller lengths will be seen after the second
patch in this series which selectively disables by pieces.
gcc/ChangeLog:
* config/riscv/riscv-string.cc
(use_vector_stringop_p): Add comment.
(expand_vec_setmem): Use use_vector_stringop_p instead of
check_vectorise_memory_operation.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/setmem-3.c: Expect smaller lmul.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/riscv/riscv-string.cc | 37 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c | 6 |
2 files changed, 22 insertions, 21 deletions
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 118c02a..20395e1 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -1062,6 +1062,9 @@ struct stringop_info { MAX_EW is the maximum element width that the caller wants to use and LENGTH_IN is the length of the stringop in bytes. + + This is currently used for cpymem and setmem. If expand_vec_cmpmem switches + to using it too then check_vectorise_memory_operation can be removed. */ static bool @@ -1600,41 +1603,39 @@ check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out) bool expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in) { - HOST_WIDE_INT lmul; + stringop_info info; + /* Check we are able and allowed to vectorise this operation; bail if not. */ - if (!check_vectorise_memory_operation (length_in, lmul)) + if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop) return false; - machine_mode vmode - = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul) - .require (); rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0)); - rtx dst = change_address (dst_in, vmode, dst_addr); + rtx dst = change_address (dst_in, info.vmode, dst_addr); - rtx fill_value = gen_reg_rtx (vmode); + rtx fill_value = gen_reg_rtx (info.vmode); rtx broadcast_ops[] = { fill_value, fill_value_in }; /* If the length is exactly vlmax for the selected mode, do that. Otherwise, use a predicated store. */ - if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in))) + if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl))) { - emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP, - broadcast_ops); + emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP, + broadcast_ops); emit_move_insn (dst, fill_value); } else { - if (!satisfies_constraint_K (length_in)) - length_in = force_reg (Pmode, length_in); - emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP, - broadcast_ops, length_in); + if (!satisfies_constraint_K (info.avl)) + info.avl = force_reg (Pmode, info.avl); + emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode), + riscv_vector::UNARY_OP, broadcast_ops, info.avl); machine_mode mask_mode - = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode)) - .require (); + = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode)) + .require (); rtx mask = CONSTM1_RTX (mask_mode); - emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in, - get_avl_type_rtx (riscv_vector::NONVLMAX))); + emit_insn (gen_pred_store (info.vmode, dst, mask, fill_value, info.avl, + get_avl_type_rtx (riscv_vector::NONVLMAX))); } return true; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c index 25be694..52766fe 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c @@ -21,13 +21,13 @@ f1 (void *a, int const b) return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1); } -/* Vectorise+inline minimum vector register width using requested lmul. +/* Vectorised code should use smallest lmul known to fit length. ** f2: ** ( -** vsetivli\s+zero,\d+,e8,m8,ta,ma +** vsetivli\s+zero,\d+,e8,m1,ta,ma ** | ** li\s+a\d+,\d+ -** vsetvli\s+zero,a\d+,e8,m8,ta,ma +** vsetvli\s+zero,a\d+,e8,m1,ta,ma ** ) ** vmv\.v\.x\s+v\d+,a1 ** vse8\.v\s+v\d+,0\(a0\) |