diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-06-28 16:39:06 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-06-28 16:47:36 +0800 |
commit | b7ab876fa96ce3b48120c14f327c1e199356e955 (patch) | |
tree | 8ae7f403ccfc29bdfccad4fd67be994d8f9b11ad /gcc | |
parent | f3d87219dd502d5c11608ffb83fbe66c79baf784 (diff) | |
download | gcc-b7ab876fa96ce3b48120c14f327c1e199356e955.zip gcc-b7ab876fa96ce3b48120c14f327c1e199356e955.tar.gz gcc-b7ab876fa96ce3b48120c14f327c1e199356e955.tar.bz2 |
RISC-V: Support vfwmacc combine lowering
This patch adds combine pattern as follows:
1. (set (reg) (fma (float_extend:reg)(float_extend:reg)(reg)))
This pattern allows combine: vfwcvt + vfwcvt + vfmacc ==> vwfmacc.
2. (set (reg) (fma (float_extend:reg)(reg)(reg)))
This pattern is the intermediate IR that enhances the combine optimizations.
Since for the complicate situation, combine pass can not combine both operands
of multiplication at the first time, it will try to first combine at the first
stage: (set (reg) (fma (float_extend:reg)(reg)(reg))). Then combine another
extension of the other operand at the second stage.
This can enhance combine optimization for the following case:
define TEST_TYPE(TYPE1, TYPE2) \
__attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 ( \
TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3, \
TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b, \
TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n) \
{ \
for (int i = 0; i < n; i++) \
{ \
dst[i] += (TYPE1) a[i] * (TYPE1) b[i]; \
dst2[i] += (TYPE1) a2[i] * (TYPE1) b[i]; \
dst3[i] += (TYPE1) a2[i] * (TYPE1) a[i]; \
dst4[i] += (TYPE1) a[i] * (TYPE1) b2[i]; \
} \
}
define TEST_ALL() \
TEST_TYPE (int16_t, int8_t) \
TEST_TYPE (uint16_t, uint8_t) \
TEST_TYPE (int32_t, int16_t) \
TEST_TYPE (uint32_t, uint16_t) \
TEST_TYPE (int64_t, int32_t) \
TEST_TYPE (uint64_t, uint32_t) \
TEST_TYPE (float, _Float16) \
TEST_TYPE (double, float)
TEST_ALL ()
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*double_widen_fma<mode>): New pattern.
(*single_widen_fma<mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/widen/widen-8.c: Add floating-point.
* gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/widen_run-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c: New test.
Diffstat (limited to 'gcc')
5 files changed, 103 insertions, 6 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 2804080..80b85fa 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -405,3 +405,61 @@ "vmv.x.s\t%0,%1" [(set_attr "type" "vimovvx") (set_attr "mode" "<MODE>")]) + +;; ------------------------------------------------------------------------- +;; ---- [FP] VFWMACC +;; ------------------------------------------------------------------------- +;; Includes: +;; - vfwmacc.vv +;; ------------------------------------------------------------------------- + +;; Combine ext + ext + fma ===> widen fma. +;; Most of circumstantces, LoopVectorizer will generate the following IR: +;; vect__8.176_40 = (vector([2,2]) double) vect__7.175_41; +;; vect__11.180_35 = (vector([2,2]) double) vect__10.179_36; +;; vect__13.182_33 = .FMA (vect__11.180_35, vect__8.176_40, vect__4.172_45); +(define_insn_and_split "*double_widen_fma<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (fma:VWEXTF + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")) + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")) + (match_operand:VWEXTF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_fp_ternary_insn (code_for_pred_widen_mul (PLUS, <MODE>mode), + riscv_vector::RVV_WIDEN_TERNOP, operands); + DONE; + } + [(set_attr "type" "vfwmuladd") + (set_attr "mode" "<V_DOUBLE_TRUNC>")]) + +;; This helps to match ext + fma. +(define_insn_and_split "*single_widen_fma<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (fma:VWEXTF + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")) + (match_operand:VWEXTF 3 "register_operand") + (match_operand:VWEXTF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_extend (<MODE>mode); + rtx tmp = gen_reg_rtx (<MODE>mode); + rtx ext_ops[] = {tmp, operands[2]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops); + + rtx dst = expand_ternary_op (<MODE>mode, fma_optab, tmp, operands[3], + operands[1], operands[0], 0); + emit_move_insn (operands[0], dst); + DONE; + } + [(set_attr "type" "vfwmuladd") + (set_attr "mode" "<V_DOUBLE_TRUNC>")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c index f3ca07c..8f41bdf 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */ #include <stdint-gcc.h> @@ -19,9 +19,12 @@ TEST_TYPE (int32_t, int16_t) \ TEST_TYPE (uint32_t, uint16_t) \ TEST_TYPE (int64_t, int32_t) \ - TEST_TYPE (uint64_t, uint32_t) + TEST_TYPE (uint64_t, uint32_t) \ + TEST_TYPE (float, _Float16) \ + TEST_TYPE (double, float) TEST_ALL () /* { dg-final { scan-assembler-times {\tvwmacc\.vv} 3 } } */ /* { dg-final { scan-assembler-times {\tvwmaccu\.vv} 3 } } */ +/* { dg-final { scan-assembler-times {\tvfwmacc\.vv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c index 187b6db..3ff8483 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */ #include <stdint-gcc.h> @@ -24,9 +24,12 @@ TEST_TYPE (int32_t, int16_t) \ TEST_TYPE (uint32_t, uint16_t) \ TEST_TYPE (int64_t, int32_t) \ - TEST_TYPE (uint64_t, uint32_t) + TEST_TYPE (uint64_t, uint32_t) \ + TEST_TYPE (float, _Float16) \ + TEST_TYPE (double, float) TEST_ALL () /* { dg-final { scan-assembler-times {\tvwmacc\.vv} 12 } } */ /* { dg-final { scan-assembler-times {\tvwmaccu\.vv} 12 } } */ +/* { dg-final { scan-assembler-times {\tvfwmacc\.vv} 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c index f4840d3..1509500 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-8.c @@ -1,5 +1,5 @@ /* { dg-do run { target { riscv_vector } } } */ -/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */ #include <assert.h> #include "widen-8.c" @@ -29,7 +29,8 @@ RUN (int32_t, int16_t, -32768) \ RUN (uint32_t, uint16_t, 65535) \ RUN (int64_t, int32_t, -2147483648) \ - RUN (uint64_t, uint32_t, 4294967295) + RUN (uint64_t, uint32_t, 4294967295) \ + RUN (double, float, -2147483648) int main () diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c new file mode 100644 index 0000000..63563b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-8.c @@ -0,0 +1,32 @@ +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -ffast-math" } */ + +#include <assert.h> +#include "widen-8.c" + +#define SZ 512 + +#define RUN(TYPE1, TYPE2, LIMIT) \ + TYPE2 a##TYPE2[SZ]; \ + TYPE2 b##TYPE2[SZ]; \ + TYPE1 dst##TYPE1[SZ]; \ + TYPE1 dst2##TYPE1[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE2[i] = LIMIT + i % 8723; \ + b##TYPE2[i] = LIMIT + i & 1964; \ + dst##TYPE1[i] = LIMIT + i & 628; \ + dst2##TYPE1[i] = LIMIT + i & 628; \ + } \ + vwmacc_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (dst##TYPE1[i] \ + == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]) + dst2##TYPE1[i]); + +#define RUN_ALL() RUN (float, _Float16, -32768) + +int +main () +{ + RUN_ALL () +} |