aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Dapp <rdapp@ventanamicro.com>2025-08-07 09:26:09 +0200
committerRobin Dapp <rdapp@ventanamicro.com>2025-09-02 17:24:40 +0200
commit9bbf0996bfcd249ef474a3da19831f66a8714319 (patch)
tree9042b538a29f63f6ebf85822767337de9e6f5303
parentf957d352bd6f240829226405e9be7960071d1b9c (diff)
downloadgcc-9bbf0996bfcd249ef474a3da19831f66a8714319.zip
gcc-9bbf0996bfcd249ef474a3da19831f66a8714319.tar.gz
gcc-9bbf0996bfcd249ef474a3da19831f66a8714319.tar.bz2
RISC-V: Fix is_vlmax_len_p and use for strided ops.
This patch changes is_vlmax_len_p to handle VLS modes properly. Before we would check if len == GET_MODE_NUNITS (mode). This works vor VLA modes but not necessarily for VLS modes. We regularly have e.g. small VLS modes where LEN equals their number of units but which do not span a full vector. Therefore now check if len * GET_MODE_UNIT_SIZE (mode) equals BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL. Changing this uncovered an oversight in avlprop where we used GET_MODE_NUNITS as AVL when GET_MODE_NUNITS / NF would be correct. The testsuite is unchanged. I didn't bother to add a dedicated test because we would have seen the fallout any way once the gather patch lands. gcc/ChangeLog: * config/riscv/riscv-v.cc (is_vlmax_len_p): Properly handle VLS modes. (imm_avl_p): Fix VLS length check. (expand_strided_load): Use is_vlmax_len_p. (expand_strided_store): Ditto. * config/riscv/riscv-avlprop.cc (pass_avlprop::execute): Use GET_MODE_NUNITS / NF as avl.
-rw-r--r--gcc/config/riscv/riscv-avlprop.cc9
-rw-r--r--gcc/config/riscv/riscv-v.cc31
2 files changed, 30 insertions, 10 deletions
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index e31fdeb..b8547a7 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -535,7 +535,14 @@ pass_avlprop::execute (function *fn)
&& !m_avl_propagations->get (candidate.second)
&& imm_avl_p (vtype_mode))
{
- rtx new_avl = gen_int_mode (GET_MODE_NUNITS (vtype_mode), Pmode);
+ /* For segmented operations AVL refers to a single register and
+ not all NF registers. Therefore divide the mode size by NF
+ to obtain the proper AVL. */
+ int nf = 1;
+ if (riscv_v_ext_tuple_mode_p (vtype_mode))
+ nf = get_nf (vtype_mode);
+ rtx new_avl = gen_int_mode
+ (GET_MODE_NUNITS (vtype_mode).to_constant () / nf, Pmode);
simplify_replace_vlmax_avl (rinsn, new_avl);
}
}
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9cbd480..b27a0be 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -63,20 +63,37 @@ imm_avl_p (machine_mode mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (mode);
+ /* For segmented operations AVL refers to a single register and not all NF
+ registers. Therefore divide the mode size by NF before checking if it is
+ in range. */
+ int nf = 1;
+ if (riscv_v_ext_tuple_mode_p (mode))
+ nf = get_nf (mode);
+
return nunits.is_constant ()
/* The vsetivli can only hold register 0~31. */
- ? (IN_RANGE (nunits.to_constant (), 0, 31))
+ ? (IN_RANGE (nunits.to_constant () / nf, 0, 31))
/* Only allowed in VLS-VLMAX mode. */
: false;
}
-/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */
+/* Return true if LEN equals the number of units in MODE if MODE is either a
+ VLA mode or MODE is a VLS mode its size equals the vector size.
+ In that case we can emit a VLMAX insn which can be optimized more easily
+ by the vsetvl pass. */
+
static bool
is_vlmax_len_p (machine_mode mode, rtx len)
{
poly_int64 value;
+ if (poly_int_rtx_p (len, &value)
+ && known_eq (value, GET_MODE_NUNITS (mode))
+ && known_eq (GET_MODE_UNIT_SIZE (mode) * value, BYTES_PER_RISCV_VECTOR))
+ return true;
+
return poly_int_rtx_p (len, &value)
- && known_eq (value, GET_MODE_NUNITS (mode));
+ && !GET_MODE_NUNITS (mode).is_constant ()
+ && known_eq (value, GET_MODE_NUNITS (mode));
}
/* Helper functions for insn_flags && insn_types */
@@ -4470,13 +4487,11 @@ expand_strided_load (machine_mode mode, rtx *ops)
int idx = 4;
get_else_operand (ops[idx++]);
rtx len = ops[idx];
- poly_int64 len_val;
insn_code icode = code_for_pred_strided_load (mode);
rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
- if (poly_int_rtx_p (len, &len_val)
- && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
else
{
@@ -4494,11 +4509,9 @@ expand_strided_store (machine_mode mode, rtx *ops)
rtx stride = ops[1];
rtx mask = ops[3];
rtx len = ops[4];
- poly_int64 len_val;
rtx vl_type;
- if (poly_int_rtx_p (len, &len_val)
- && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
len = gen_reg_rtx (Pmode);
emit_vlmax_vsetvl (mode, len);