diff options
author | Pan Li <pan2.li@intel.com> | 2023-09-28 13:51:07 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-09-28 22:22:30 +0800 |
commit | 88d8829e4f435bfc844db5a9df730e20faf7c2c7 (patch) | |
tree | 479cb90bcb3e78229c7b20b3b73d53a55d14b370 /gcc | |
parent | 0c8ecbcd3cf7d7187d2017ad02b663a57123b417 (diff) | |
download | gcc-88d8829e4f435bfc844db5a9df730e20faf7c2c7.zip gcc-88d8829e4f435bfc844db5a9df730e20faf7c2c7.tar.gz gcc-88d8829e4f435bfc844db5a9df730e20faf7c2c7.tar.bz2 |
RISC-V: Support {U}INT64 to FP16 auto-vectorization
Update in v2:
* Add math trap check.
* Adjust some test cases.
Original logs:
This patch would like to support the auto-vectorization from
the INT64 to FP16. We take below steps for the conversion.
* INT64 to FP32.
* FP32 to FP16.
Given sample code as below:
void
test_func (int64_t * __restrict a, _Float16 *b, unsigned n)
{
for (unsigned i = 0; i < n; i++)
b[i] = (_Float16) (a[i]);
}
Before this patch:
test.c:6:26: missed: couldn't vectorize loop
test.c:6:26: missed: not vectorized: unsupported data-type
ld a0,0(s0)
call __floatdihf
fsh fa0,0(s1)
addi s0,s0,8
addi s1,s1,2
bne s2,s0,.L3
ld ra,24(sp)
ld s0,16(sp)
ld s1,8(sp)
ld s2,0(sp)
addi sp,sp,32
After this patch:
vsetvli a5,a2,e8,mf8,ta,ma
vle64.v v1,0(a0)
vsetvli a4,zero,e32,mf2,ta,ma
vfncvt.f.x.w v1,v1
vsetvli zero,zero,e16,mf4,ta,ma
vfncvt.f.f.w v1,v1
vsetvli zero,a2,e16,mf4,ta,ma
vse16.v v1,0(a1)
Please note VLS mode is also involved in this patch and covered by the
test cases.
PR target/111506
gcc/ChangeLog:
* config/riscv/autovec.md (<float_cvt><mode><vnnconvert>2):
New pattern.
* config/riscv/vector-iterators.md: New iterator.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/unop/cvt-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/cvt-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/cvt-0.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/riscv/autovec.md | 24 | ||||
-rw-r--r-- | gcc/config/riscv/vector-iterators.md | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c | 47 |
5 files changed, 152 insertions, 0 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index cd0cbdd..d6cf376 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -974,6 +974,30 @@ } [(set_attr "type" "vfncvtitof")]) +;; This operation can be performed in the loop vectorizer but unfortunately +;; not applicable for now. We can remove this pattern after loop vectorizer +;; is able to take care of INT64 to FP16 conversion. +(define_insn_and_split "<float_cvt><mode><vnnconvert>2" + [(set (match_operand:<VNNCONVERT> 0 "register_operand") + (any_float:<VNNCONVERT> + (match_operand:VWWCONVERTI 1 "register_operand")))] + "TARGET_VECTOR && TARGET_ZVFH && can_create_pseudo_p () && !flag_trapping_math" + "#" + "&& 1" + [(const_int 0)] + { + rtx single = gen_reg_rtx (<VNCONVERT>mode); /* Get vector SF mode. */ + + /* Step-1, INT64 => FP32. */ + emit_insn (gen_<float_cvt><mode><vnconvert>2 (single, operands[1])); + /* Step-2, FP32 => FP16. */ + emit_insn (gen_trunc<vnconvert><vnnconvert>2 (operands[0], single)); + + DONE; + } + [(set_attr "type" "vfncvtitof")] +) + ;; ========================================================================= ;; == Unary arithmetic ;; ========================================================================= diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index b6cd872..c9a7344 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -1247,6 +1247,24 @@ (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096") ]) +(define_mode_iterator VWWCONVERTI [ + (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM2DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + + (V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH") + (V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 64") + (V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 128") + (V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 256") + (V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 512") + (V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024") + (V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048") + (V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096") +]) + (define_mode_iterator VQEXTI [ RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") @@ -3243,6 +3261,26 @@ (V512DF "v512si") ]) +;; NN indicates narrow twice +(define_mode_attr VNNCONVERT [ + (RVVM8DI "RVVM2HF") (RVVM4DI "RVVM1HF") (RVVM2DI "RVVMF2HF") + (RVVM1DI "RVVMF4HF") + + (V1DI "V1HF") (V2DI "V2HF") (V4DI "V4HF") (V8DI "V8HF") (V16DI "V16HF") + (V32DI "V32HF") (V64DI "V64HF") (V128DI "V128HF") (V256DI "V256HF") + (V512DI "V512HF") +]) + +;; nn indicates narrow twice +(define_mode_attr vnnconvert [ + (RVVM8DI "rvvm2hf") (RVVM4DI "rvvm1hf") (RVVM2DI "rvvmf2hf") + (RVVM1DI "rvvmf4hf") + + (V1DI "v1hf") (V2DI "v2hf") (V4DI "v4hf") (V8DI "v8hf") (V16DI "v16hf") + (V32DI "v32hf") (V64DI "v64hf") (V128DI "v128hf") (V256DI "v256hf") + (V512DI "v512hf") +]) + (define_mode_attr VDEMOTE [ (RVVM8DI "RVVM8SI") (RVVM4DI "RVVM4SI") (RVVM2DI "RVVM2SI") (RVVM1DI "RVVM1SI") (V1DI "V1SI") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c new file mode 100644 index 0000000..f08c121 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-0.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -ffast-math -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <stdint.h> + +/* +** test_int65_to_fp16: +** ... +** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma +** vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+ +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+ +** ... +*/ +void +test_int65_to_fp16 (int64_t * __restrict a, _Float16 *b, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + b[i] = (_Float16) (a[i]); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c new file mode 100644 index 0000000..2d8ba8f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/cvt-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -ffast-math -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <stdint.h> + +/* +** test_uint65_to_fp16: +** ... +** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma +** vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+ +** vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*ta,\s*ma +** vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+ +** ... +*/ +void +test_uint65_to_fp16 (uint64_t * __restrict a, _Float16 *b, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + b[i] = (_Float16) (a[i]); +} + diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c new file mode 100644 index 0000000..5637b05 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/cvt-0.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -ffast-math --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */ + +#include "def.h" + +DEF_CONVERT (fp16, int64_t, _Float16, 1) +DEF_CONVERT (fp16, int64_t, _Float16, 2) +DEF_CONVERT (fp16, int64_t, _Float16, 4) +DEF_CONVERT (fp16, int64_t, _Float16, 8) +DEF_CONVERT (fp16, int64_t, _Float16, 16) +DEF_CONVERT (fp16, int64_t, _Float16, 32) +DEF_CONVERT (fp16, int64_t, _Float16, 64) +DEF_CONVERT (fp16, int64_t, _Float16, 128) +DEF_CONVERT (fp16, int64_t, _Float16, 256) +DEF_CONVERT (fp16, int64_t, _Float16, 512) +DEF_CONVERT (fp16, int64_t, _Float16, 1024) +DEF_CONVERT (fp16, int64_t, _Float16, 2048) + +DEF_CONVERT (fp16, uint64_t, _Float16, 1) +DEF_CONVERT (fp16, uint64_t, _Float16, 2) +DEF_CONVERT (fp16, uint64_t, _Float16, 4) +DEF_CONVERT (fp16, uint64_t, _Float16, 8) +DEF_CONVERT (fp16, uint64_t, _Float16, 16) +DEF_CONVERT (fp16, uint64_t, _Float16, 32) +DEF_CONVERT (fp16, uint64_t, _Float16, 64) +DEF_CONVERT (fp16, uint64_t, _Float16, 128) +DEF_CONVERT (fp16, uint64_t, _Float16, 256) +DEF_CONVERT (fp16, uint64_t, _Float16, 512) +DEF_CONVERT (fp16, uint64_t, _Float16, 1024) +DEF_CONVERT (fp16, uint64_t, _Float16, 2048) + +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.x\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.xu\.w\s+v[0-9]+,\s*v[0-9]+} 15 } } */ +/* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 30 } } */ |