diff options
author | Robin Dapp <rdapp@ventanamicro.com> | 2025-08-07 09:26:09 +0200 |
---|---|---|
committer | Robin Dapp <rdapp@ventanamicro.com> | 2025-09-02 17:24:40 +0200 |
commit | 9bbf0996bfcd249ef474a3da19831f66a8714319 (patch) | |
tree | 9042b538a29f63f6ebf85822767337de9e6f5303 | |
parent | f957d352bd6f240829226405e9be7960071d1b9c (diff) | |
download | gcc-9bbf0996bfcd249ef474a3da19831f66a8714319.zip gcc-9bbf0996bfcd249ef474a3da19831f66a8714319.tar.gz gcc-9bbf0996bfcd249ef474a3da19831f66a8714319.tar.bz2 |
RISC-V: Fix is_vlmax_len_p and use for strided ops.
This patch changes is_vlmax_len_p to handle VLS modes properly.
Before we would check if len == GET_MODE_NUNITS (mode). This works vor
VLA modes but not necessarily for VLS modes. We regularly have e.g.
small VLS modes where LEN equals their number of units but which do not
span a full vector. Therefore now check if len * GET_MODE_UNIT_SIZE
(mode) equals BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL.
Changing this uncovered an oversight in avlprop where we used
GET_MODE_NUNITS as AVL when GET_MODE_NUNITS / NF would be correct.
The testsuite is unchanged. I didn't bother to add a dedicated test
because we would have seen the fallout any way once the gather patch
lands.
gcc/ChangeLog:
* config/riscv/riscv-v.cc (is_vlmax_len_p): Properly handle VLS
modes.
(imm_avl_p): Fix VLS length check.
(expand_strided_load): Use is_vlmax_len_p.
(expand_strided_store): Ditto.
* config/riscv/riscv-avlprop.cc (pass_avlprop::execute):
Use GET_MODE_NUNITS / NF as avl.
-rw-r--r-- | gcc/config/riscv/riscv-avlprop.cc | 9 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 31 |
2 files changed, 30 insertions, 10 deletions
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index e31fdeb..b8547a7 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -535,7 +535,14 @@ pass_avlprop::execute (function *fn) && !m_avl_propagations->get (candidate.second) && imm_avl_p (vtype_mode)) { - rtx new_avl = gen_int_mode (GET_MODE_NUNITS (vtype_mode), Pmode); + /* For segmented operations AVL refers to a single register and + not all NF registers. Therefore divide the mode size by NF + to obtain the proper AVL. */ + int nf = 1; + if (riscv_v_ext_tuple_mode_p (vtype_mode)) + nf = get_nf (vtype_mode); + rtx new_avl = gen_int_mode + (GET_MODE_NUNITS (vtype_mode).to_constant () / nf, Pmode); simplify_replace_vlmax_avl (rinsn, new_avl); } } diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9cbd480..b27a0be 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -63,20 +63,37 @@ imm_avl_p (machine_mode mode) { poly_uint64 nunits = GET_MODE_NUNITS (mode); + /* For segmented operations AVL refers to a single register and not all NF + registers. Therefore divide the mode size by NF before checking if it is + in range. */ + int nf = 1; + if (riscv_v_ext_tuple_mode_p (mode)) + nf = get_nf (mode); + return nunits.is_constant () /* The vsetivli can only hold register 0~31. */ - ? (IN_RANGE (nunits.to_constant (), 0, 31)) + ? (IN_RANGE (nunits.to_constant () / nf, 0, 31)) /* Only allowed in VLS-VLMAX mode. */ : false; } -/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */ +/* Return true if LEN equals the number of units in MODE if MODE is either a + VLA mode or MODE is a VLS mode its size equals the vector size. + In that case we can emit a VLMAX insn which can be optimized more easily + by the vsetvl pass. */ + static bool is_vlmax_len_p (machine_mode mode, rtx len) { poly_int64 value; + if (poly_int_rtx_p (len, &value) + && known_eq (value, GET_MODE_NUNITS (mode)) + && known_eq (GET_MODE_UNIT_SIZE (mode) * value, BYTES_PER_RISCV_VECTOR)) + return true; + return poly_int_rtx_p (len, &value) - && known_eq (value, GET_MODE_NUNITS (mode)); + && !GET_MODE_NUNITS (mode).is_constant () + && known_eq (value, GET_MODE_NUNITS (mode)); } /* Helper functions for insn_flags && insn_types */ @@ -4470,13 +4487,11 @@ expand_strided_load (machine_mode mode, rtx *ops) int idx = 4; get_else_operand (ops[idx++]); rtx len = ops[idx]; - poly_int64 len_val; insn_code icode = code_for_pred_strided_load (mode); rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride}; - if (poly_int_rtx_p (len, &len_val) - && known_eq (len_val, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops); else { @@ -4494,11 +4509,9 @@ expand_strided_store (machine_mode mode, rtx *ops) rtx stride = ops[1]; rtx mask = ops[3]; rtx len = ops[4]; - poly_int64 len_val; rtx vl_type; - if (poly_int_rtx_p (len, &len_val) - && known_eq (len_val, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) { len = gen_reg_rtx (Pmode); emit_vlmax_vsetvl (mode, len); |