aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-07-14 06:17:09 +0800
committerPan Li <pan2.li@intel.com>2023-07-14 20:53:30 +0800
commit0d2673e995f0dd69f406a34d2e87d2a25cf3c285 (patch)
treea3e7dd7baefe5a74f5182941f9d59fc758fb1c7e /gcc
parent53d12ecd624ec901d8449cfa1917f6f90e910927 (diff)
downloadgcc-0d2673e995f0dd69f406a34d2e87d2a25cf3c285.zip
gcc-0d2673e995f0dd69f406a34d2e87d2a25cf3c285.tar.gz
gcc-0d2673e995f0dd69f406a34d2e87d2a25cf3c285.tar.bz2
RISC-V: Enable COND_LEN_FMA auto-vectorization
Add comments as Robin's suggestion in scatter_store_run-7.c Enable COND_LEN_FMA auto-vectorization for floating-point FMA auto-vectorization **NO** ffast-math. Since the middle-end support has been approved and I will merge it after I finished bootstrap && regression on X86. https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624395.html Now, it's time to send this patch. Consider this following case: __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst, \ TYPE *__restrict a, \ TYPE *__restrict b, int n) \ { \ for (int i = 0; i < n; i++) \ dst[i] += a[i] * b[i]; \ } TEST_ALL () Before this patch: ternop_double: ble a3,zero,.L5 mv a6,a0 .L3: vsetvli a5,a3,e64,m1,tu,ma slli a4,a5,3 vle64.v v1,0(a0) vle64.v v2,0(a1) vle64.v v3,0(a2) sub a3,a3,a5 vfmul.vv v2,v2,v3 vfadd.vv v1,v1,v2 vse64.v v1,0(a6) add a0,a0,a4 add a1,a1,a4 add a2,a2,a4 add a6,a6,a4 bne a3,zero,.L3 .L5: ret After this patch: ternop_double: ble a3,zero,.L5 mv a6,a0 .L3: vsetvli a5,a3,e64,m1,tu,ma slli a4,a5,3 vle64.v v1,0(a0) vle64.v v2,0(a1) vle64.v v3,0(a2) sub a3,a3,a5 vfmacc.vv v1,v3,v2 vse64.v v1,0(a6) add a0,a0,a4 add a1,a1,a4 add a2,a2,a4 add a6,a6,a4 bne a3,zero,.L3 .L5: ret Notice: This patch only supports COND_LEN_FMA, **NO** COND_LEN_FNMA, ... etc since I didn't support them in the middle-end yet. Will support them in the following patches soon. gcc/ChangeLog: * config/riscv/autovec.md (cond_len_fma<mode>): New pattern. * config/riscv/riscv-protos.h (enum insn_type): New enum. (expand_cond_len_ternop): New function. * config/riscv/riscv-v.cc (emit_nonvlmax_fp_ternary_tu_insn): Ditto. (expand_cond_len_ternop): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c: Adapt testcase for link fail. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/autovec.md23
-rw-r--r--gcc/config/riscv/riscv-protos.h2
-rw-r--r--gcc/config/riscv/riscv-v.cc49
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c6
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c7
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c11
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c9
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c4
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c4
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c4
10 files changed, 118 insertions, 1 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 0476b1d..64a41bd 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1531,3 +1531,26 @@
riscv_vector::expand_cond_len_binop (<CODE>, operands);
DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Conditional ternary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfmacc/...
+;; -------------------------------------------------------------------------
+
+(define_expand "cond_len_fma<mode>"
+ [(match_operand:VF 0 "register_operand")
+ (match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")
+ (match_operand:VF 4 "register_operand")
+ (match_operand:VF 5 "register_operand")
+ (match_operand 6 "autovec_length_operand")
+ (match_operand 7 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_mul (PLUS, <MODE>mode);
+ riscv_vector::expand_cond_len_ternop (icode, operands);
+ DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 1a622c5..f91c2d5 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -191,6 +191,7 @@ enum insn_type
RVV_UNOP_MU = RVV_UNOP + 2, /* Likewise. */
RVV_UNOP_M = RVV_UNOP + 2, /* Likewise. */
RVV_TERNOP = 5,
+ RVV_TERNOP_TU = RVV_TERNOP + 1,
RVV_WIDEN_TERNOP = 4,
RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md. */
RVV_SLIDE_OP = 4, /* Dest, VUNDEF, source and offset. */
@@ -306,6 +307,7 @@ void expand_vec_perm (rtx, rtx, rtx, rtx);
void expand_select_vl (rtx *);
void expand_load_store (rtx *, bool);
void expand_gather_scatter (rtx *, bool);
+void expand_cond_len_ternop (unsigned, rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 90da638..c3fd4a1 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -748,6 +748,28 @@ emit_vlmax_fp_ternary_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
e.emit_insn ((enum insn_code) icode, ops);
}
+/* This function emits a {NONVLMAX, TAIL_UNDISTURBED, MASK_ANY} vsetvli followed
+ * by the ternary operation which always has a real merge operand. */
+static void
+emit_nonvlmax_fp_ternary_tu_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
+{
+ machine_mode dest_mode = GET_MODE (ops[0]);
+ machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+ insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+ /*HAS_DEST_P*/ true,
+ /*FULLY_UNMASKED_P*/ false,
+ /*USE_REAL_MERGE_P*/ true,
+ /*HAS_AVL_P*/ true,
+ /*VLMAX_P*/ false,
+ /*DEST_MODE*/ dest_mode,
+ /*MASK_MODE*/ mask_mode);
+ e.set_policy (TAIL_UNDISTURBED);
+ e.set_policy (MASK_ANY);
+ e.set_rounding_mode (FRM_DYN);
+ e.set_vl (vl);
+ e.emit_insn ((enum insn_code) icode, ops);
+}
+
/* This function emits a {NONVLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
* actual operation. */
void
@@ -3267,4 +3289,31 @@ expand_gather_scatter (rtx *ops, bool is_load)
}
}
+/* Expand COND_LEN_*. */
+void
+expand_cond_len_ternop (unsigned icode, rtx *ops)
+{
+ rtx dest = ops[0];
+ rtx mask = ops[1];
+ rtx len = ops[6];
+ machine_mode mode = GET_MODE (dest);
+ machine_mode mask_mode = GET_MODE (mask);
+
+ poly_uint64 value;
+ bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
+
+ if (is_dummy_mask)
+ {
+ /* Use TU, MASK ANY policy. */
+ if (FLOAT_MODE_P (mode))
+ emit_nonvlmax_fp_ternary_tu_insn (icode, RVV_TERNOP_TU, ops, len);
+ else
+ /* FIXME: Enable this case when we support it in the middle-end. */
+ gcc_unreachable ();
+ }
+ else
+ /* FIXME: Enable this case when we support it in the middle-end. */
+ gcc_unreachable ();
+}
+
} // namespace riscv_vector
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c
index a244011..6f7316e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c
@@ -1,5 +1,9 @@
/* { dg-do run { target { riscv_vector } } } */
-
+/* For some reason we exceed
+ the default code model's +-2 GiB limits. We should investigate why and
+ add a proper description here. For now just make sure the test case
+ compiles properly. */
+/* { dg-additional-options "-mcmodel=medany" } */
#include "scatter_store-7.c"
#include <assert.h>
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c
new file mode 100644
index 0000000..d608504
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "ternop-1.c"
+
+/* { dg-final { scan-assembler-not {\tvmv} } } */
+/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 3 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c
new file mode 100644
index 0000000..cb60540
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "ternop-2.c"
+
+/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 9 } } */
+/* TODO: we don't have undefine IR for COND_LEN_* operations,
+ which will produce redundant move instructions here.
+ Will add assembler-not check of 'vmv' instructions in the future. */
+/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 9 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c
new file mode 100644
index 0000000..63cd4ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
+
+#include "ternop-3.c"
+
+/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmv} 11 } } */
+/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 6 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c
new file mode 100644
index 0000000..446d216
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include "ternop_run-1.c"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c
new file mode 100644
index 0000000..55ee829
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include "ternop_run-2.c"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c
new file mode 100644
index 0000000..31aab4c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include "ternop_run-3.c"