RISC-V: Support vfwmul.vv combine lowering

Consider the following complicate case: __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 ( \ TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3, \ TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b, \ TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n) \ { \ for (int i = 0; i < n; i++) \ { \ dst[i] = (TYPE1) a[i] * (TYPE1) b[i]; \ dst2[i] = (TYPE1) a2[i] * (TYPE1) b[i]; \ dst3[i] = (TYPE1) a2[i] * (TYPE1) a[i]; \ dst4[i] = (TYPE1) a[i] * (TYPE1) b2[i]; \ } \ } TEST_TYPE (double, float) Such complicate situation, Combine PASS can not combine extension of both operands on the fly. So the combine PASS will first try to combine one of the combine extension, and then combine the other. The combine flow is as follows: Original IR: (set (reg 0) (float_extend: (reg 1)) (set (reg 3) (float_extend: (reg 2)) (set (reg 4) (mult: (reg 0) (reg 3)) First step of combine: (set (reg 3) (float_extend: (reg 2)) (set (reg 4) (mult: (float_extend: (reg 1) (reg 3)) Second step of combine: (set (reg 4) (mult: (float_extend: (reg 1) (float_extend: (reg 2)) So, to enhance the combine optimization, we add a "pseudo vwfmul.wv" RTL pattern in autovec-opt.md which is (set (reg 0) (mult (float_extend (reg 1) (reg 2)))). gcc/ChangeLog: * config/riscv/autovec-opt.md (@pred_single_widen_mul<any_extend:su><mode>): Change "@" into "*" in pattern name which simplifies build files. (*pred_single_widen_mul<any_extend:su><mode>): Ditto. (*pred_single_widen_mul<mode>): New pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/widen/widen-3.c: Add floating-point. * gcc.target/riscv/rvv/autovec/widen/widen-7.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen_run-7.c: Ditto. * gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c: New test.
author: Juzhe-Zhong <juzhe.zhong@rivai.ai> 2023-06-28 12:15:12 +0800
committer: Lehua Ding <lehua.ding@rivai.ai> 2023-07-03 17:22:28 +0800
commit: bc32918b063b9fa3dffc8815478a81df6ad999ca (patch)
tree: 32d7a8b1f11de5be4ad9d080672c9f5c8ab5d249 /gcc
parent: 3755ad7514978e88809a7ad98c10592e4814a6ef (diff)
download: gcc-bc32918b063b9fa3dffc8815478a81df6ad999ca.zip
gcc-bc32918b063b9fa3dffc8815478a81df6ad999ca.tar.gz
gcc-bc32918b063b9fa3dffc8815478a81df6ad999ca.tar.bz2
8 files changed, 116 insertions, 10 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 80b85fa..a362812 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -406,6 +406,45 @@
   [(set_attr "type" "vimovvx")
    (set_attr "mode" "<MODE>")])
 
+;; We don't have vfwmul.wv instruction like vfwadd.wv in RVV.
+;; This pattern is an intermediate RTL IR as a pseudo vfwmul.wv to enhance
+;; optimization of instructions combine.
+(define_insn_and_split "*pred_single_widen_mul<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand"                  "=&vr,  &vr")
+       (if_then_else:VWEXTF
+         (unspec:<VM>
+           [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              "   rK,   rK")
+            (match_operand 6 "const_int_operand"                  "    i,    i")
+            (match_operand 7 "const_int_operand"                  "    i,    i")
+            (match_operand 8 "const_int_operand"                  "    i,    i")
+            (match_operand 9 "const_int_operand"                  "    i,    i")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)
+            (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+         (mult:VWEXTF
+           (float_extend:VWEXTF
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr,   vr"))
+           (match_operand:VWEXTF 3 "register_operand"             "   vr,   vr"))
+         (match_operand:VWEXTF 2 "vector_merge_operand"           "   vu,    0")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_extend (<MODE>mode);
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    rtx ops[] = {tmp, operands[4]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops);
+
+    emit_insn (gen_pred (MULT, <MODE>mode, operands[0], operands[1], operands[2],
+                        operands[3], tmp, operands[5], operands[6],
+                        operands[7], operands[8], operands[9]));
+    DONE;
+  }
+  [(set_attr "type" "vfwmul")
+   (set_attr "mode" "<MODE>")])
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] VFWMACC
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
index 609a5c0..b2b1440 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
@@ -19,9 +19,12 @@
   TEST_TYPE (int32_t, int16_t)                                                 \
   TEST_TYPE (uint32_t, uint16_t)                                               \
   TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
index cc43d9b..3806e8b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
@@ -19,9 +19,12 @@
   TEST_TYPE (int32_t, int16_t)                                                 \
   TEST_TYPE (uint32_t, uint16_t)                                               \
   TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvsext\.vf2} 3 } } */
 /* { dg-final { scan-assembler-times {\tvzext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwcvt} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
index e1fd794..1515374 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
@@ -24,9 +24,12 @@
   TEST_TYPE (int32_t, int16_t)                                                 \
   TEST_TYPE (uint32_t, uint16_t)                                               \
   TEST_TYPE (int64_t, int32_t)                                                 \
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               \
+  TEST_TYPE (float, _Float16)                                                  \
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvwmul\.vv} 12 } } */
 /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
index beb0cc2..b7dd60f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <assert.h>
 #include "widen-3.c"
@@ -25,7 +25,8 @@
   RUN (int32_t, int16_t, -32768)                                               \
   RUN (uint32_t, uint16_t, 65535)                                              \
   RUN (int64_t, int32_t, -2147483648)                                          \
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         \
+  RUN (double, float, -2147483648)
 
 int
 main ()
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
index 4abddd5..ab29f4a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <assert.h>
 #include "widen-7.c"
@@ -25,7 +25,8 @@
   RUN (int32_t, int16_t, -32768)                                               \
   RUN (uint32_t, uint16_t, 65535)                                              \
   RUN (int64_t, int32_t, -2147483648)                                          \
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         \
+  RUN (double, float, -2147483648)
 
 int
 main ()
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c
new file mode 100644
index 0000000..c3efd0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
+
+#include <assert.h>
+#include "widen-3.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE2 b##TYPE2[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE2[i] = LIMIT + i & 1964;                                          \
+    }                                                                          \
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]));
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c
new file mode 100644
index 0000000..60e2401c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */
+
+#include <assert.h>
+#include "widen-7.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE1 b##TYPE1[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % LIMIT;                                         \
+      b##TYPE1[i] = LIMIT + i & LIMIT;                                         \
+    }                                                                          \
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE1, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == (((TYPE1) a##TYPE2[i]) * b##TYPE1[i]));
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>	2023-06-28 12:15:12 +0800
committer	Lehua Ding <lehua.ding@rivai.ai>	2023-07-03 17:22:28 +0800
commit	bc32918b063b9fa3dffc8815478a81df6ad999ca (patch)
tree	32d7a8b1f11de5be4ad9d080672c9f5c8ab5d249 /gcc
parent	3755ad7514978e88809a7ad98c10592e4814a6ef (diff)
download	gcc-bc32918b063b9fa3dffc8815478a81df6ad999ca.zip gcc-bc32918b063b9fa3dffc8815478a81df6ad999ca.tar.gz gcc-bc32918b063b9fa3dffc8815478a81df6ad999ca.tar.bz2