diff options
author | konglin1 <lingling.kong@intel.com> | 2021-10-27 17:15:05 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-11-10 15:10:23 +0800 |
commit | f2572a398d21fd52435c94065c0651fd79db847c (patch) | |
tree | a263f0e469d85acd0a685ac3fd9e32832beb00ff /gcc | |
parent | 9299f69027e8e00f5a9debe765a3280ebcba5cd1 (diff) | |
download | gcc-f2572a398d21fd52435c94065c0651fd79db847c.zip gcc-f2572a398d21fd52435c94065c0651fd79db847c.tar.gz gcc-f2572a398d21fd52435c94065c0651fd79db847c.tar.bz2 |
i386: Support complex fma/conj_fma for _Float16.
Support cmla_optab, cmul_optab, cmla_conj_optab, cmul_conj_optab for vector _Float16.
gcc/ChangeLog:
* config/i386/sse.md (cmul<conj_op><mode>3): add new define_expand.
(cmla<conj_op><mode>4): Likewise
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fp16-vector-complex-float.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c | 40 |
2 files changed, 63 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a58d8e8..1908412 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5938,6 +5938,12 @@ [(UNSPEC_COMPLEX_FMA_PAIR "fmaddc") (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")]) +(define_int_attr conj_op + [(UNSPEC_COMPLEX_FMA "") + (UNSPEC_COMPLEX_FCMA "_conj") + (UNSPEC_COMPLEX_FMUL "") + (UNSPEC_COMPLEX_FCMUL "_conj")]) + (define_mode_attr complexmove [(V32HF "avx512f_loadv16sf") (V16HF "avx512vl_loadv8sf") @@ -6019,6 +6025,15 @@ DONE; }) +(define_expand "cmla<conj_op><mode>4" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") + (unspec:VF_AVX512FP16VL + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") + (match_operand:VF_AVX512FP16VL 2 "vector_operand") + (match_operand:VF_AVX512FP16VL 3 "vector_operand")] + UNSPEC_COMPLEX_F_C_MA))] + "TARGET_AVX512FP16") + (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>" [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") (unspec:VF_AVX512FP16VL @@ -6153,6 +6168,14 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_expand "cmul<conj_op><mode>3" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") + (unspec:VF_AVX512FP16VL + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") + (match_operand:VF_AVX512FP16VL 2 "vector_operand")] + UNSPEC_COMPLEX_F_C_MUL))] + "TARGET_AVX512FP16") + (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>" [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") (unspec:VF_AVX512FP16VL diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c new file mode 100644 index 0000000..7c579cb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-not "vfmadd\[123]*ph\[ \\t\]"} } */ +/* { dg-final { scan-assembler-not "vfmadd\[123]*sh\[ \\t\]"} } */ +/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfmulcph\[ \\t\]" 1 } } */ +/* { dg-final { scan-assembler-times "vfcmulcph\[ \\t\]" 1 } } */ + +#include<complex.h> +#define TYPE _Float16 +#define N 16 + +void fma0 (_Complex TYPE *a, _Complex TYPE *b, + _Complex TYPE * __restrict c) +{ + for (int i = 0; i < N; i++) + c[i] += a[i] * b[i]; +} + +void fmaconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N], + _Complex TYPE c[restrict N]) +{ + for (int i = 0; i < N; i++) + c[i] += a[i] * ~b[i]; +} + +void fmul (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N], + _Complex TYPE c[restrict N]) +{ + for (int i = 0; i < N; i++) + c[i] = a[i] * b[i]; +} + +void fmulconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N], + _Complex TYPE c[restrict N]) +{ + for (int i = 0; i < N; i++) + c[i] = a[i] * ~b[i]; +} |