RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move

V2: Address comments from Robin. While working on fixing a bug, I notice this following code has redundant move: #include "riscv_vector.h" void f (float x, float y, void *out) { float f[4] = { x, x, x, y }; vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4); __riscv_vse32_v_f32m1 (out, v, 4); } Before this patch: f: vsetivli zero,4,e32,m1,ta,ma addi sp,sp,-16 vfmv.v.f v1,fa0 vfslide1down.vf v1,v1,fa1 vmv.v.v v1,v1 ----> redundant move. vse32.v v1,0(a0) addi sp,sp,16 jr ra The rootcause is that the complicate vmv.v.v pattern doesn't simplify it into simple (set (reg) (reg)) reg-to-reg move pattern. Currently, we support such simplification for VLMAX. However, the case I found is non-VLMAX but with LEN = NUNITS which should be considered as equivalent to VLMAX. Add a simple fix for such situation. Tested on both RV32/RV64 no regressions. gcc/ChangeLog: * config/riscv/riscv-protos.h (whole_reg_to_reg_move_p): New function. * config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto. * config/riscv/vector.md: Allow non-vlmax with len = NUNITS simplification. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/vf_avl-4.c: New test.
author: Juzhe-Zhong <juzhe.zhong@rivai.ai> 2024-01-06 10:08:55 +0800
committer: Pan Li <pan2.li@intel.com> 2024-01-06 10:12:43 +0800
commit: 9873f13d833b536b46cd6ff46d72e62407b048a8 (patch)
tree: 2954af305be21d4d5f75081bc4cba70261981af9
parent: 5a0b3355d956f5d36f9b562e027b890cc5f61d88 (diff)
download: gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.zip
gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.tar.gz
gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.tar.bz2
4 files changed, 39 insertions, 7 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0f0337c..00a5b64 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -687,6 +687,7 @@ bool imm_avl_p (machine_mode);
 bool can_be_broadcasted_p (rtx);
 bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
+bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ec85964..2491522 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5117,4 +5117,27 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
   return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN;
 }
 
+/* Return true it is whole register-register move.  */
+bool
+whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
+{
+  /* An operation is a whole-register move if either
+     (1) Its vlmax operand equals VLMAX
+     (2) Its vl operand equals the number of units of its mode.  */
+  if (register_operand (ops[0], mode)
+      && register_operand (ops[3], mode)
+      && satisfies_constraint_vu (ops[2])
+      && satisfies_constraint_Wc1 (ops[1]))
+    {
+      if (INTVAL (ops[avl_type_index]) == VLMAX)
+	return true;
+      /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+	 into NON-VLMAX with LEN = NUNITS.  */
+      else if (CONST_INT_P (ops[4])
+	       && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
+	return true;
+    }
+  return false;
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3d2c1c3..be5beb5 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1724,10 +1724,7 @@
    vse<sew>.v\t%3,%0%p1
    vmv.v.v\t%0,%3
    vmv.v.v\t%0,%3"
-  "&& register_operand (operands[0], <MODE>mode)
-   && register_operand (operands[3], <MODE>mode)
-   && satisfies_constraint_vu (operands[2])
-   && INTVAL (operands[7]) == riscv_vector::VLMAX"
+  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
@@ -1776,9 +1773,7 @@
    vmmv.m\t%0,%3
    vmclr.m\t%0
    vmset.m\t%0"
-  "&& register_operand (operands[0], <MODE>mode)
-   && register_operand (operands[3], <MODE>mode)
-   && INTVAL (operands[5]) == riscv_vector::VLMAX"
+  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
new file mode 100644
index 0000000..1b4bfd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "riscv_vector.h"
+void
+f (float x, float y, void *out)
+{
+  float f[4] = { x, x, x, y };
+  vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
+  __riscv_vse32_v_f32m1 (out, v, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>	2024-01-06 10:08:55 +0800
committer	Pan Li <pan2.li@intel.com>	2024-01-06 10:12:43 +0800
commit	9873f13d833b536b46cd6ff46d72e62407b048a8 (patch)
tree	2954af305be21d4d5f75081bc4cba70261981af9
parent	5a0b3355d956f5d36f9b562e027b890cc5f61d88 (diff)
download	gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.zip gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.tar.gz gcc-9873f13d833b536b46cd6ff46d72e62407b048a8.tar.bz2