RISC-V: Fixed failed rvv combine testcases

This patch fixed the fellowing failed testcases on the trunk: FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2 ... FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3 ... The reason for these failed testcases is the introduce of .VCOND_MASK_LEN in midend for other bugfix and further leads to a new vcond_mask_len rtl pattern after expand. So we need add new combine patterns handle this case. Consider this code: int16_t foo (int8_t *restrict a, int8_t *restrict pred) { int16_t sum = 0; for (int i = 0; i < 16; i += 1) if (pred[i]) sum += a[i]; return sum; } Before this patch: foo: vsetivli zero,16,e8,m1,ta,ma vle8.v v0,0(a1) vsetvli a5,zero,e8,m1,ta,ma vmsne.vi v0,v0,0 vsetvli zero,zero,e16,m2,ta,ma li a3,0 vmv.v.i v2,0 vsetivli zero,16,e16,m2,ta,ma vle8.v v6,0(a0),v0.t vmv.s.x v1,a3 vsetvli a5,zero,e16,m2,ta,ma vsext.vf2 v4,v6 vsetivli zero,16,e16,m2,tu,ma vmerge.vvm v2,v2,v4,v0 vsetvli a5,zero,e16,m2,ta,ma vredsum.vs v2,v2,v1 vmv.x.s a0,v2 slliw a0,a0,16 sraiw a0,a0,16 ret After this patch: foo: vsetivli zero,16,e16,m2,ta,ma li a5,0 vle8.v v0,0(a1) vmv.s.x v1,a5 vsetvli zero,zero,e8,m1,ta,ma vmsne.vi v0,v0,0 vle8.v v2,0(a0),v0.t vwredsum.vs v1,v2,v1,v0.t vsetvli zero,zero,e16,m1,ta,ma vmv.x.s a0,v1 slliw a0,a0,16 sraiw a0,a0,16 ret Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while reducing the corresponding vsetvl instructions. gcc/ChangeLog: * config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>): New combine pattern. (*cond_len_<optab><v_quad_trunc><mode>): Ditto. (*cond_len_<optab><v_oct_trunc><mode>): Ditto. (*cond_len_extend<v_double_trunc><mode>): Ditto. (*cond_len_widen_reduc_plus_scal_<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c: * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:
author: Lehua Ding <lehua.ding@rivai.ai> 2023-11-07 15:33:20 +0800
committer: Lehua Ding <lehua.ding@rivai.ai> 2023-11-07 15:53:53 +0800
commit: 71b1efd910d47362558defc31b92fc4cc80c0ec6 (patch)
tree: 6bd42ac1e44e57ad1d14923704e339e557a6da10 /gcc
parent: 86c913ec92c1b8e871e061ff35db58c2f81780f4 (diff)
download: gcc-71b1efd910d47362558defc31b92fc4cc80c0ec6.zip
gcc-71b1efd910d47362558defc31b92fc4cc80c0ec6.tar.gz
gcc-71b1efd910d47362558defc31b92fc4cc80c0ec6.tar.bz2
3 files changed, 232 insertions, 25 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index d0f8b3c..3c87e66 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -194,6 +194,84 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
+  [(set (match_operand:VWEXTI 0 "register_operand")
+    (if_then_else:VWEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTI
+        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
+  [(set (match_operand:VQEXTI 0 "register_operand")
+    (if_then_else:VQEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VQEXTI
+        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2 "register_operand"))
+        (match_operand:VQEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
+  [(set (match_operand:VOEXTI 0 "register_operand")
+    (if_then_else:VOEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VOEXTI
+        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2 "register_operand"))
+        (match_operand:VOEXTI 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -235,6 +313,32 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine FP extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
+  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
+    (if_then_else:VWEXTF_ZVFHMIN
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTF_ZVFHMIN
+        (float_extend:VWEXTF_ZVFHMIN (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
+	(match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_extend<mode> (operands[0], operands[3], operands[1], operands[2],
+                                    operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine FP trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -1151,6 +1255,61 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (any_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine mask_extend + vfredsum to mask_vfwredusum
 ;; where the mrege of mask_extend is vector const 0
 (define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
@@ -1187,6 +1346,61 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (float_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VF_HS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; =============================================================================
 ;; Misc combine patterns
 ;; =============================================================================
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
index 22a7104..47889f3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -15,16 +15,27 @@
 
 #define TEST_ALL(TEST)                                                         \
   TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int8_t, 8)                                                    \
   TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int8_t, 4)                                                    \
+  TEST (int64_t, int16_t, 4)                                                   \
   TEST (int64_t, int32_t, 4)                                                   \
   TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint8_t, 8)                                                  \
   TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint8_t, 4)                                                  \
+  TEST (uint64_t, uint16_t, 4)                                                 \
   TEST (uint64_t, uint32_t, 4)                                                 \
   TEST (float, _Float16, 8)                                                    \
+  TEST (double, _Float16, 4)                                                   \
   TEST (double, float, 4)
 
 TEST_ALL (TEST_TYPE)
 
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
index 7c8fedd..662d135 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c
@@ -1,30 +1,12 @@
 /* { dg-do compile } */
 /* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
-#include <stdint-gcc.h>
 
-#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
-  __attribute__ ((noipa))                                                      \
-  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
-  {                                                                            \
-    TYPE1 sum = 0;                                                             \
-    for (int i = 0; i < N; i += 1)                                             \
-      if (pred[i])                                                             \
-	sum += a[i];                                                           \
-    return sum;                                                                \
-  }
+#include "cond_widen_reduc-1.c"
 
-#define TEST_ALL(TEST)                                                         \
-  TEST (int16_t, int8_t, 16)                                                   \
-  TEST (int32_t, int16_t, 8)                                                   \
-  TEST (int64_t, int32_t, 4)                                                   \
-  TEST (uint16_t, uint8_t, 16)                                                 \
-  TEST (uint32_t, uint16_t, 8)                                                 \
-  TEST (uint64_t, uint32_t, 4)                                                 \
-  TEST (float, _Float16, 8)                                                    \
-  TEST (double, float, 4)
-
-TEST_ALL (TEST_TYPE)
-
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
author	Lehua Ding <lehua.ding@rivai.ai>	2023-11-07 15:33:20 +0800
committer	Lehua Ding <lehua.ding@rivai.ai>	2023-11-07 15:53:53 +0800
commit	71b1efd910d47362558defc31b92fc4cc80c0ec6 (patch)
tree	6bd42ac1e44e57ad1d14923704e339e557a6da10 /gcc
parent	86c913ec92c1b8e871e061ff35db58c2f81780f4 (diff)
download	gcc-71b1efd910d47362558defc31b92fc4cc80c0ec6.zip gcc-71b1efd910d47362558defc31b92fc4cc80c0ec6.tar.gz gcc-71b1efd910d47362558defc31b92fc4cc80c0ec6.tar.bz2