aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-12-27 10:38:26 +0800
committerPan Li <pan2.li@intel.com>2023-12-29 08:38:03 +0800
commitd1eacedc6d9ba9f5522f2c8d49ccfdf7939ad72d (patch)
tree02bff2f39a932cf20db291a00c5dbb7c20ebd576 /gcc/config
parent7de05ad450044927eb1eb0259d5dd1aa385f4325 (diff)
downloadgcc-d1eacedc6d9ba9f5522f2c8d49ccfdf7939ad72d.zip
gcc-d1eacedc6d9ba9f5522f2c8d49ccfdf7939ad72d.tar.gz
gcc-d1eacedc6d9ba9f5522f2c8d49ccfdf7939ad72d.tar.bz2
RISC-V: Disallow transformation into VLMAX AVL for cond_len_xxx when length is in range [0, 31]
Notice we have this following situation: vsetivli zero,4,e32,m1,ta,ma vlseg4e32.v v4,(a5) vlseg4e32.v v12,(a3) vsetvli a5,zero,e32,m1,tu,ma ---> This is redundant since VLMAX AVL = 4 when it is fixed-vlmax vfadd.vf v3,v13,fa0 vfadd.vf v1,v12,fa1 vfmul.vv v17,v3,v5 vfmul.vv v16,v1,v5 The rootcause is that we transform COND_LEN_xxx into VLMAX AVL when len == NUNITS blindly. However, we don't need to transform all of them since when len is range of [0,31], we don't need to consume scalar registers. After this patch: vsetivli zero,4,e32,m1,tu,ma addi a4,a5,400 vlseg4e32.v v12,(a3) vfadd.vf v3,v13,fa0 vfadd.vf v1,v12,fa1 vlseg4e32.v v4,(a4) vfadd.vf v2,v14,fa1 vfmul.vv v17,v3,v5 vfmul.vv v16,v1,v5 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/riscv-v.cc (is_vlmax_len_p): New function. (expand_load_store): Disallow transformation into VLMAX when len is in range of [0,31] (expand_cond_len_op): Ditto. (expand_gather_scatter): Ditto. (expand_lanes_load_store): Ditto. (expand_fold_extract_last): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/post-ra-avl.c: Adapt test. * gcc.target/riscv/rvv/base/vf_avl-2.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/riscv/riscv-v.cc21
1 files changed, 15 insertions, 6 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 038ab08..b4c7e0f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -68,6 +68,16 @@ imm_avl_p (machine_mode mode)
: false;
}
+/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */
+static bool
+is_vlmax_len_p (machine_mode mode, rtx len)
+{
+ poly_int64 value;
+ return poly_int_rtx_p (len, &value)
+ && known_eq (value, GET_MODE_NUNITS (mode))
+ && !satisfies_constraint_K (len);
+}
+
/* Helper functions for insn_flags && insn_types */
/* Return true if caller need pass mask operand for insn pattern with
@@ -3776,7 +3786,7 @@ expand_load_store (rtx *ops, bool is_load)
rtx len = ops[3];
machine_mode mode = GET_MODE (ops[0]);
- if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
/* If the length operand is equal to VF, it is VLMAX load/store. */
if (is_load)
@@ -3842,8 +3852,7 @@ expand_cond_len_op (unsigned icode, insn_flags op_type, rtx *ops, rtx len)
machine_mode mask_mode = GET_MODE (mask);
poly_int64 value;
bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
- bool is_vlmax_len
- = poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode));
+ bool is_vlmax_len = is_vlmax_len_p (mode, len);
unsigned insn_flags = HAS_DEST_P | HAS_MASK_P | HAS_MERGE_P | op_type;
if (is_dummy_mask)
@@ -4012,7 +4021,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode);
poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
poly_int64 value;
- bool is_vlmax = poly_int_rtx_p (len, &value) && known_eq (value, nunits);
+ bool is_vlmax = is_vlmax_len_p (vec_mode, len);
/* Extend the offset element to address width. */
if (inner_offsize < BITS_PER_WORD)
@@ -4199,7 +4208,7 @@ expand_lanes_load_store (rtx *ops, bool is_load)
rtx reg = is_load ? ops[0] : ops[1];
machine_mode mode = GET_MODE (ops[0]);
- if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
/* If the length operand is equal to VF, it is VLMAX load/store. */
if (is_load)
@@ -4252,7 +4261,7 @@ expand_fold_extract_last (rtx *ops)
rtx slide_vect = gen_reg_rtx (mode);
insn_code icode;
- if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
len = NULL_RTX;
/* Calculate the number of 1-bit in mask. */