diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-07-14 06:17:09 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-07-14 20:53:30 +0800 |
commit | 0d2673e995f0dd69f406a34d2e87d2a25cf3c285 (patch) | |
tree | a3e7dd7baefe5a74f5182941f9d59fc758fb1c7e /gcc | |
parent | 53d12ecd624ec901d8449cfa1917f6f90e910927 (diff) | |
download | gcc-0d2673e995f0dd69f406a34d2e87d2a25cf3c285.zip gcc-0d2673e995f0dd69f406a34d2e87d2a25cf3c285.tar.gz gcc-0d2673e995f0dd69f406a34d2e87d2a25cf3c285.tar.bz2 |
RISC-V: Enable COND_LEN_FMA auto-vectorization
Add comments as Robin's suggestion in scatter_store_run-7.c
Enable COND_LEN_FMA auto-vectorization for floating-point FMA auto-vectorization **NO** ffast-math.
Since the middle-end support has been approved and I will merge it after I finished bootstrap && regression on X86.
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624395.html
Now, it's time to send this patch.
Consider this following case:
__attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst, \
TYPE *__restrict a, \
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] += a[i] * b[i]; \
}
TEST_ALL ()
Before this patch:
ternop_double:
ble a3,zero,.L5
mv a6,a0
.L3:
vsetvli a5,a3,e64,m1,tu,ma
slli a4,a5,3
vle64.v v1,0(a0)
vle64.v v2,0(a1)
vle64.v v3,0(a2)
sub a3,a3,a5
vfmul.vv v2,v2,v3
vfadd.vv v1,v1,v2
vse64.v v1,0(a6)
add a0,a0,a4
add a1,a1,a4
add a2,a2,a4
add a6,a6,a4
bne a3,zero,.L3
.L5:
ret
After this patch:
ternop_double:
ble a3,zero,.L5
mv a6,a0
.L3:
vsetvli a5,a3,e64,m1,tu,ma
slli a4,a5,3
vle64.v v1,0(a0)
vle64.v v2,0(a1)
vle64.v v3,0(a2)
sub a3,a3,a5
vfmacc.vv v1,v3,v2
vse64.v v1,0(a6)
add a0,a0,a4
add a1,a1,a4
add a2,a2,a4
add a6,a6,a4
bne a3,zero,.L3
.L5:
ret
Notice: This patch only supports COND_LEN_FMA, **NO** COND_LEN_FNMA, ... etc since I didn't support them
in the middle-end yet.
Will support them in the following patches soon.
gcc/ChangeLog:
* config/riscv/autovec.md (cond_len_fma<mode>): New pattern.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(expand_cond_len_ternop): New function.
* config/riscv/riscv-v.cc (emit_nonvlmax_fp_ternary_tu_insn): Ditto.
(expand_cond_len_ternop): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c:
Adapt testcase for link fail.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c: New test.
Diffstat (limited to 'gcc')
10 files changed, 118 insertions, 1 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 0476b1d..64a41bd 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1531,3 +1531,26 @@ riscv_vector::expand_cond_len_binop (<CODE>, operands); DONE; }) + +;; ------------------------------------------------------------------------- +;; ---- [FP] Conditional ternary operations +;; ------------------------------------------------------------------------- +;; Includes: +;; - vfmacc/... +;; ------------------------------------------------------------------------- + +(define_expand "cond_len_fma<mode>" + [(match_operand:VF 0 "register_operand") + (match_operand:<VM> 1 "vector_mask_operand") + (match_operand:VF 2 "register_operand") + (match_operand:VF 3 "register_operand") + (match_operand:VF 4 "register_operand") + (match_operand:VF 5 "register_operand") + (match_operand 6 "autovec_length_operand") + (match_operand 7 "const_0_operand")] + "TARGET_VECTOR" +{ + insn_code icode = code_for_pred_mul (PLUS, <MODE>mode); + riscv_vector::expand_cond_len_ternop (icode, operands); + DONE; +}) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 1a622c5..f91c2d5 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -191,6 +191,7 @@ enum insn_type RVV_UNOP_MU = RVV_UNOP + 2, /* Likewise. */ RVV_UNOP_M = RVV_UNOP + 2, /* Likewise. */ RVV_TERNOP = 5, + RVV_TERNOP_TU = RVV_TERNOP + 1, RVV_WIDEN_TERNOP = 4, RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md. */ RVV_SLIDE_OP = 4, /* Dest, VUNDEF, source and offset. */ @@ -306,6 +307,7 @@ void expand_vec_perm (rtx, rtx, rtx, rtx); void expand_select_vl (rtx *); void expand_load_store (rtx *, bool); void expand_gather_scatter (rtx *, bool); +void expand_cond_len_ternop (unsigned, rtx *); /* Rounding mode bitfield for fixed point VXRM. */ enum fixed_point_rounding_mode diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 90da638..c3fd4a1 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -748,6 +748,28 @@ emit_vlmax_fp_ternary_insn (unsigned icode, int op_num, rtx *ops, rtx vl) e.emit_insn ((enum insn_code) icode, ops); } +/* This function emits a {NONVLMAX, TAIL_UNDISTURBED, MASK_ANY} vsetvli followed + * by the ternary operation which always has a real merge operand. */ +static void +emit_nonvlmax_fp_ternary_tu_insn (unsigned icode, int op_num, rtx *ops, rtx vl) +{ + machine_mode dest_mode = GET_MODE (ops[0]); + machine_mode mask_mode = get_mask_mode (dest_mode).require (); + insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num, + /*HAS_DEST_P*/ true, + /*FULLY_UNMASKED_P*/ false, + /*USE_REAL_MERGE_P*/ true, + /*HAS_AVL_P*/ true, + /*VLMAX_P*/ false, + /*DEST_MODE*/ dest_mode, + /*MASK_MODE*/ mask_mode); + e.set_policy (TAIL_UNDISTURBED); + e.set_policy (MASK_ANY); + e.set_rounding_mode (FRM_DYN); + e.set_vl (vl); + e.emit_insn ((enum insn_code) icode, ops); +} + /* This function emits a {NONVLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the * actual operation. */ void @@ -3267,4 +3289,31 @@ expand_gather_scatter (rtx *ops, bool is_load) } } +/* Expand COND_LEN_*. */ +void +expand_cond_len_ternop (unsigned icode, rtx *ops) +{ + rtx dest = ops[0]; + rtx mask = ops[1]; + rtx len = ops[6]; + machine_mode mode = GET_MODE (dest); + machine_mode mask_mode = GET_MODE (mask); + + poly_uint64 value; + bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode)); + + if (is_dummy_mask) + { + /* Use TU, MASK ANY policy. */ + if (FLOAT_MODE_P (mode)) + emit_nonvlmax_fp_ternary_tu_insn (icode, RVV_TERNOP_TU, ops, len); + else + /* FIXME: Enable this case when we support it in the middle-end. */ + gcc_unreachable (); + } + else + /* FIXME: Enable this case when we support it in the middle-end. */ + gcc_unreachable (); +} + } // namespace riscv_vector diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c index a244011..6f7316e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c @@ -1,5 +1,9 @@ /* { dg-do run { target { riscv_vector } } } */ - +/* For some reason we exceed + the default code model's +-2 GiB limits. We should investigate why and + add a proper description here. For now just make sure the test case + compiles properly. */ +/* { dg-additional-options "-mcmodel=medany" } */ #include "scatter_store-7.c" #include <assert.h> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c new file mode 100644 index 0000000..d608504 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */ + +#include "ternop-1.c" + +/* { dg-final { scan-assembler-not {\tvmv} } } */ +/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 3 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c new file mode 100644 index 0000000..cb60540 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */ + +#include "ternop-2.c" + +/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */ +/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 9 } } */ +/* TODO: we don't have undefine IR for COND_LEN_* operations, + which will produce redundant move instructions here. + Will add assembler-not check of 'vmv' instructions in the future. */ +/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 9 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c new file mode 100644 index 0000000..63cd4ae --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */ + +#include "ternop-3.c" + +/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */ +/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 6 } } */ +/* { dg-final { scan-assembler-times {\tvmv} 11 } } */ +/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 6 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c new file mode 100644 index 0000000..446d216 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c @@ -0,0 +1,4 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ + +#include "ternop_run-1.c" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c new file mode 100644 index 0000000..55ee829 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c @@ -0,0 +1,4 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ + +#include "ternop_run-2.c" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c new file mode 100644 index 0000000..31aab4c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c @@ -0,0 +1,4 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ + +#include "ternop_run-3.c" |