diff options
author | liuhongt <hongtao.liu@intel.com> | 2020-05-20 15:53:14 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2020-05-22 21:30:23 +0800 |
commit | e740f3d73144abbca1ad98a04825c6bd63314a0b (patch) | |
tree | 602f5aa0812e522d3d4ebc25eaa3504ad0890dc0 /gcc | |
parent | 808b611bfb4b05703ea174e50874c711dca44c98 (diff) | |
download | gcc-e740f3d73144abbca1ad98a04825c6bd63314a0b.zip gcc-e740f3d73144abbca1ad98a04825c6bd63314a0b.tar.gz gcc-e740f3d73144abbca1ad98a04825c6bd63314a0b.tar.bz2 |
Add missing vector truncmn2 expanders [PR92658]
2020-05-22 Hongtao.liu <hongtao.liu@intel.com>
gcc/ChangeLog:
PR target/92658
* config/i386/sse.md (trunc<pmov_src_lower><mode>2): New expander
(truncv32hiv32qi2): Ditto.
(trunc<ssedoublemodelower><mode>2): Ditto.
(trunc<mode><pmov_dst_3>2): Ditto.
(trunc<mode><pmov_dst_mode_4>2): Ditto.
(truncv2div2si2): Ditto.
(truncv8div8qi2): Ditto.
(avx512f_<code>v8div16qi2): Renaming from *avx512f_<code>v8div16qi2.
(avx512vl_<code>v2div2si): Renaming from *avx512vl_<code>v2div2si2.
(avx512vl_<code><mode>v2<ssecakarnum>qi2): Renaming
from *avx512vl_<code><mode>v<ssescalarnum>qi2.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr92658-avx512f.c: New test.
* gcc.target/i386/pr92658-avx512vl.c: Ditto.
* gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 77 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c | 91 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-avx512f.c | 106 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c | 129 |
6 files changed, 420 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9106cdb2..4049ac3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2020-05-22 Hongtao.liu <hongtao.liu@intel.com> + + PR target/92658 + * config/i386/sse.md (trunc<pmov_src_lower><mode>2): New expander + (truncv32hiv32qi2): Ditto. + (trunc<ssedoublemodelower><mode>2): Ditto. + (trunc<mode><pmov_dst_3>2): Ditto. + (trunc<mode><pmov_dst_mode_4>2): Ditto. + (truncv2div2si2): Ditto. + (truncv8div8qi2): Ditto. + (avx512f_<code>v8div16qi2): Renaming from *avx512f_<code>v8div16qi2. + (avx512vl_<code>v2div2si): Renaming from *avx512vl_<code>v2div2si2. + (avx512vl_<code><mode>v2<ssecakarnum>qi2): Renaming from + *avx512vl_<code><mode>v<ssescalarnum>qi2. + 2020-05-22 H.J. Lu <hongjiu.lu@intel.com> PR target/95258 diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5071fb2..bb8ee19 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10513,6 +10513,12 @@ (define_mode_attr pmov_suff_1 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")]) +(define_expand "trunc<pmov_src_lower><mode>2" + [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand") + (truncate:PMOV_DST_MODE_1 + (match_operand:<pmov_src_mode> 1 "register_operand")))] + "TARGET_AVX512F") + (define_insn "*avx512f_<code><pmov_src_lower><mode>2" [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_1 @@ -10547,6 +10553,12 @@ (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512F") +(define_expand "truncv32hiv32qi2" + [(set (match_operand:V32QI 0 "nonimmediate_operand") + (truncate:V32QI + (match_operand:V32HI 1 "register_operand")))] + "TARGET_AVX512BW") + (define_insn "avx512bw_<code>v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI @@ -10586,6 +10598,12 @@ (define_mode_attr pmov_suff_2 [(V16QI "wb") (V8HI "dw") (V4SI "qd")]) +(define_expand "trunc<ssedoublemodelower><mode>2" + [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand") + (truncate:PMOV_DST_MODE_2 + (match_operand:<ssedoublemode> 1 "register_operand")))] + "TARGET_AVX512VL") + (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2" [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_2 @@ -10628,7 +10646,20 @@ (define_mode_attr pmov_suff_3 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")]) -(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2" +(define_expand "trunc<mode><pmov_dst_3>2" + [(set (match_operand:<pmov_dst_3> 0 "register_operand") + (truncate:<pmov_dst_3> + (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0); + emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0], + operands[1], + CONST0_RTX (<pmov_dst_zeroed_3>mode))); + DONE; +}) + +(define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (any_truncate:<pmov_dst_3> @@ -10920,7 +10951,21 @@ (define_mode_attr pmov_suff_4 [(V4DI "qw") (V2DI "qw") (V4SI "dw")]) -(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2" +(define_expand "trunc<mode><pmov_dst_4>2" + [(set (match_operand:<pmov_dst_4> 0 "register_operand") + (truncate:<pmov_dst_4> + (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0); + emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0], + operands[1], + CONST0_RTX (<pmov_dst_zeroed_4>mode))); + DONE; + +}) + +(define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_concat:V8HI (any_truncate:<pmov_dst_4> @@ -11085,7 +11130,20 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_<code>v2div2si2" +(define_expand "truncv2div2si2" + [(set (match_operand:V2SI 0 "register_operand") + (truncate:V2SI + (match_operand:V2DI 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0); + emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0], + operands[1], + CONST0_RTX (V2SImode))); + DONE; +}) + +(define_insn "avx512vl_<code>v2div2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (any_truncate:V2SI @@ -11164,7 +11222,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512f_<code>v8div16qi2" +(define_expand "truncv8div8qi2" + [(set (match_operand:V8QI 0 "register_operand") + (truncate:V8QI + (match_operand:V8DI 1 "register_operand")))] + "TARGET_AVX512F" +{ + operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0); + emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "avx512f_<code>v8div16qi2" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (any_truncate:V8QI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5a62826..7812e3f0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-05-22 Hongtao.liu <hongtao.liu@intel.com> + + * gcc.target/i386/pr92658-avx512f.c: New test. + * gcc.target/i386/pr92658-avx512vl.c: Ditto. + * gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto. + 2020-05-22 Richard Biener <rguenther@suse.de> PR tree-optimization/95268 diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c new file mode 100644 index 0000000..bdfad7a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c @@ -0,0 +1,91 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned char v32qi __attribute__((vector_size (32))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned short v16hi __attribute__((vector_size (32))); +typedef unsigned short v32hi __attribute__((vector_size (64))); + + +void +truncwb_512 (v32qi * dst, v32hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + tem[16] = (*src)[16]; + tem[17] = (*src)[17]; + tem[18] = (*src)[18]; + tem[19] = (*src)[19]; + tem[20] = (*src)[20]; + tem[21] = (*src)[21]; + tem[22] = (*src)[22]; + tem[23] = (*src)[23]; + tem[24] = (*src)[24]; + tem[25] = (*src)[25]; + tem[26] = (*src)[26]; + tem[27] = (*src)[27]; + tem[28] = (*src)[28]; + tem[29] = (*src)[29]; + tem[30] = (*src)[30]; + tem[31] = (*src)[31]; + dst[0] = *(v32qi *) tem; +} + +void +truncwb_256 (v16qi * dst, v16hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16qi *) tem; +} + +void +truncwb_128 (v16qi * dst, v8hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovwb" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 3 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c new file mode 100644 index 0000000..2ba2907 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c @@ -0,0 +1,106 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned short v16hi __attribute__((vector_size (32))); +typedef unsigned int v8si __attribute__((vector_size (32))); +typedef unsigned int v16si __attribute__((vector_size (64))); +typedef unsigned long long v8di __attribute__((vector_size (64))); + +void +truncqd (v8si * dst, v8di * __restrict src) +{ + unsigned tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8si *) tem; +} + +void +truncqw (v8hi * dst, v8di * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb (v8qi * dst, v8di * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8qi *) tem; +} + +void +truncdw (v16hi * dst, v16si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16hi *) tem; +} + + +void +truncdb (v16qi * dst, v16si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovqd" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c new file mode 100644 index 0000000..50b32f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c @@ -0,0 +1,129 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mavx512vl" } */ + +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned int v4si __attribute__((vector_size (16))); +typedef unsigned int v8si __attribute__((vector_size (32))); +typedef unsigned long long v2di __attribute__((vector_size (16))); +typedef unsigned long long v4di __attribute__((vector_size (32))); + +void +truncqd_256 (v4si * dst, v4di * __restrict src) +{ + unsigned tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v4si *) tem; +} + +void +truncqw_256 (v8hi * dst, v4di * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb_256 (v16qi * dst, v4di * __restrict src) +{ + unsigned char tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v16qi *) tem; +} + +void +truncqd_128 (v4si * dst, v2di * __restrict src) +{ + unsigned tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v4si *) tem; +} + +void +truncqw_128 (v8hi * dst, v2di * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb_128 (v16qi * dst, v2di * __restrict src) +{ + unsigned char tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v16qi *) tem; +} + +void +truncdw_256 (v8hi * dst, v8si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8hi *) tem; +} + +void +truncdb_256 (v16qi * dst, v8si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v16qi *) tem; +} + +void +truncdw_128 (v8hi * dst, v4si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8hi *) tem; +} + +void +truncdb_128 (v16qi * dst, v4si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovqd" 2 } } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 2 { xfail *-*-* } } } */ |