RISC-V: Support widening register overlap for vf4/vf8

size_t foo (char const *buf, size_t len) { size_t sum = 0; size_t vl = __riscv_vsetvlmax_e8m8 (); size_t step = vl * 4; const char *it = buf, *end = buf + len; for (; it + step <= end;) { vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl); it += vl; vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl); it += vl; vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl); it += vl; vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl); it += vl; asm volatile("nop" ::: "memory"); vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl); vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl); vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl); vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl); asm volatile("nop" ::: "memory"); size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0); size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1); size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2); size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3); sum += sumation (sum0, sum1, sum2, sum3); } return sum; } Before this patch: add a3,s0,s1 add a4,s6,s1 add a5,s7,s1 vsetvli zero,s0,e64,m8,ta,ma vle8.v v4,0(s1) vle8.v v3,0(a3) mv s1,s2 vle8.v v2,0(a4) vle8.v v1,0(a5) nop vsext.vf8 v8,v4 vsext.vf8 v16,v2 vs8r.v v8,0(sp) vsext.vf8 v24,v1 vsext.vf8 v8,v3 nop vmv.x.s a1,v8 vl8re64.v v8,0(sp) vmv.x.s a3,v24 vmv.x.s a2,v16 vmv.x.s a0,v8 add s2,s2,s5 call sumation add s3,s3,a0 bgeu s4,s2,.L5 After this patch: add a3,s0,s1 add a4,s6,s1 add a5,s7,s1 vsetvli zero,s0,e64,m8,ta,ma vle8.v v15,0(s1) vle8.v v23,0(a3) mv s1,s2 vle8.v v31,0(a4) vle8.v v7,0(a5) vsext.vf8 v8,v15 vsext.vf8 v16,v23 vsext.vf8 v24,v31 vsext.vf8 v0,v7 vmv.x.s a3,v0 vmv.x.s a2,v24 vmv.x.s a1,v16 vmv.x.s a0,v8 add s2,s2,s5 call sumation add s3,s3,a0 bgeu s4,s2,.L5 PR target/112431 gcc/ChangeLog: * config/riscv/vector.md: Add widening overlap of vf2/vf4. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-16.c: New test. * gcc.target/riscv/rvv/base/pr112431-17.c: New test. * gcc.target/riscv/rvv/base/pr112431-18.c: New test.
author: Juzhe-Zhong <juzhe.zhong@rivai.ai> 2023-11-30 20:08:43 +0800
committer: Pan Li <pan2.li@intel.com> 2023-11-30 20:11:24 +0800
commit: 303195e2a6b6f0e8f42e0578b61f9f37c6250beb (patch)
tree: bb95154dad098bffbbbd2592458829501240bb54 /gcc
parent: 5a35152f87a36db480693884dfb27ff6a5d5d683 (diff)
download: gcc-303195e2a6b6f0e8f42e0578b61f9f37c6250beb.zip
gcc-303195e2a6b6f0e8f42e0578b61f9f37c6250beb.tar.gz
gcc-303195e2a6b6f0e8f42e0578b61f9f37c6250beb.tar.bz2
4 files changed, 190 insertions, 18 deletions
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index b5667b9..b47b974 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3704,43 +3704,45 @@
 
 ;; Vector Quad-Widening Sign-extend and Zero-extend.
 (define_insn "@pred_<optab><mode>_vf4"
-  [(set (match_operand:VQEXTI 0 "register_operand"          "=&vr,&vr")
+  [(set (match_operand:VQEXTI 0 "register_operand"               "=vr,   vr,   vr,   vr, ?&vr, ?&vr")
 	(if_then_else:VQEXTI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"          "   rK,   rK")
-	     (match_operand 5 "const_int_operand"              "    i,    i")
-	     (match_operand 6 "const_int_operand"              "    i,    i")
-	     (match_operand 7 "const_int_operand"              "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"          "   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
+	     (match_operand 6 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (any_extend:VQEXTI
-	    (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "   vr,   vr"))
-	  (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0")))]
+	    (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "  W43,  W43,  W86,  W86,   vr,   vr"))
+	  (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "v<sz>ext.vf4\t%0,%3%p1"
   [(set_attr "type" "vext")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
 
 ;; Vector Oct-Widening Sign-extend and Zero-extend.
 (define_insn "@pred_<optab><mode>_vf8"
-  [(set (match_operand:VOEXTI 0 "register_operand"         "=&vr,&vr")
+  [(set (match_operand:VOEXTI 0 "register_operand"              "=vr,   vr, ?&vr, ?&vr")
 	(if_then_else:VOEXTI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 5 "const_int_operand"             "    i,    i")
-	     (match_operand 6 "const_int_operand"             "    i,    i")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 6 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (any_extend:VOEXTI
-	    (match_operand:<V_OCT_TRUNC> 3 "register_operand" "   vr,   vr"))
-	  (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0")))]
+	    (match_operand:<V_OCT_TRUNC> 3 "register_operand" "  W87,  W87,   vr,   vr"))
+	  (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "v<sz>ext.vf8\t%0,%3%p1"
   [(set_attr "type" "vext")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W87,W87,none,none")])
 
 ;; Vector Widening Add/Subtract/Multiply.
 (define_insn "@pred_dual_widen_<any_widen_binop:optab><any_extend:su><mode>"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 0000000..98f4245
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+      vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+      vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+      vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+      vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+      vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+      vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+      vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 0000000..9b60005
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+      vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+      vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+      vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
+      vint32m8_t vw1 = __riscv_vsext_vf4_i32m8 (v1, vl);
+      vint32m8_t vw2 = __riscv_vsext_vf4_i32m8 (v2, vl);
+      vint32m8_t vw3 = __riscv_vsext_vf4_i32m8 (v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i32m8_i32 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i32m8_i32 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i32m8_i32 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i32m8_i32 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
new file mode 100644
index 0000000..dd65b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
+      vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
+      vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
+      vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>	2023-11-30 20:08:43 +0800
committer	Pan Li <pan2.li@intel.com>	2023-11-30 20:11:24 +0800
commit	303195e2a6b6f0e8f42e0578b61f9f37c6250beb (patch)
tree	bb95154dad098bffbbbd2592458829501240bb54 /gcc
parent	5a35152f87a36db480693884dfb27ff6a5d5d683 (diff)
download	gcc-303195e2a6b6f0e8f42e0578b61f9f37c6250beb.zip gcc-303195e2a6b6f0e8f42e0578b61f9f37c6250beb.tar.gz gcc-303195e2a6b6f0e8f42e0578b61f9f37c6250beb.tar.bz2