diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2023-05-10 22:40:53 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2023-05-10 22:42:41 +0200 |
commit | 608e7f3ab47fe746279c552c3574147aa3d8ee76 (patch) | |
tree | 07fada17524dde9b3e76fb37dd2f016b2863177b | |
parent | bdc10c2bfaceb3be567e0a27d8951a22b4be2ed4 (diff) | |
download | gcc-608e7f3ab47fe746279c552c3574147aa3d8ee76.zip gcc-608e7f3ab47fe746279c552c3574147aa3d8ee76.tar.gz gcc-608e7f3ab47fe746279c552c3574147aa3d8ee76.tar.bz2 |
i386: Add missing vector extend patterns [PR92658]
Add missing insn pattern for v2qi -> v2si vector extend and named
expanders to activate generation of vector extends to 8-byte and 4-byte
vectors.
gcc/ChangeLog:
PR target/92658
* config/i386/mmx.md (sse4_1_<code>v2qiv2si2): New insn pattern.
(<insn>v4qiv4hi2): New expander.
(<insn>v2hiv2si2): Ditto.
(<insn>v2qiv2si2): Ditto.
(<insn>v2qiv2hi2): Ditto.
gcc/testsuite/ChangeLog:
PR target/92658
* gcc.target/i386/pr92658-sse4-4b.c: New test.
* gcc.target/i386/pr92658-sse4-8b.c: New test.
-rw-r--r-- | gcc/config/i386/mmx.md | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c | 26 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c | 71 |
3 files changed, 159 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 6dd203f..e7ca921 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3543,6 +3543,18 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) +(define_expand "<insn>v4qiv4hi2" + [(set (match_operand:V4HI 0 "register_operand") + (any_extend:V4HI + (match_operand:V4QI 1 "register_operand")))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" +{ + rtx op1 = force_reg (V4QImode, operands[1]); + op1 = lowpart_subreg (V8QImode, op1, V4QImode); + emit_insn (gen_sse4_1_<code>v4qiv4hi2 (operands[0], op1)); + DONE; +}) + (define_insn "sse4_1_<code>v2hiv2si2" [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v") (any_extend:V2SI @@ -3557,6 +3569,44 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) +(define_expand "<insn>v2hiv2si2" + [(set (match_operand:V2SI 0 "register_operand") + (any_extend:V2SI + (match_operand:V2HI 1 "register_operand")))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" +{ + rtx op1 = force_reg (V2HImode, operands[1]); + op1 = lowpart_subreg (V4HImode, op1, V2HImode); + emit_insn (gen_sse4_1_<code>v2hiv2si2 (operands[0], op1)); + DONE; +}) + +(define_insn "sse4_1_<code>v2qiv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v") + (any_extend:V2SI + (vec_select:V2QI + (match_operand:V4QI 1 "register_operand" "Yr,*x,v") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) + +(define_expand "<insn>v2qiv2si2" + [(set (match_operand:V2SI 0 "register_operand") + (any_extend:V2SI + (match_operand:V2QI 1 "register_operand")))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" +{ + rtx op1 = force_reg (V2QImode, operands[1]); + op1 = lowpart_subreg (V4QImode, op1, V2QImode); + emit_insn (gen_sse4_1_<code>v2qiv2si2 (operands[0], op1)); + DONE; +}) + (define_insn "sse4_1_<code>v2qiv2hi2" [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw") (any_extend:V2HI @@ -3571,6 +3621,18 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "TI")]) +(define_expand "<insn>v2qiv2hi2" + [(set (match_operand:V2HI 0 "register_operand") + (any_extend:V2HI + (match_operand:V2QI 1 "register_operand")))] + "TARGET_SSE4_1" +{ + rtx op1 = force_reg (V2QImode, operands[1]); + op1 = lowpart_subreg (V4QImode, op1, V2QImode); + emit_insn (gen_sse4_1_<code>v2qiv2hi2 (operands[0], op1)); + DONE; +}) + ;; Pack/unpack vector modes (define_mode_attr mmxpackmode [(V4HI "V8QI") (V2SI "V4HI")]) diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c new file mode 100644 index 0000000..f0264a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c @@ -0,0 +1,26 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse4.1" } */ + +typedef unsigned char v4qi __attribute__((vector_size (4))); +typedef unsigned short v2hi __attribute__((vector_size (4))); + +void +foo_u8_u16 (v2hi * dst, v4qi * __restrict src) +{ + unsigned short tem[2]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v2hi *) tem; +} + +void +bar_u8_u16 (v2hi * dst, v4qi src) +{ + unsigned short tem[4]; + tem[0] = src[0]; + tem[1] = src[1]; + dst[0] = *(v2hi *) tem; +} + +/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c new file mode 100644 index 0000000..5c815f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c @@ -0,0 +1,71 @@ +/* PR target/92658 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse4.1" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned short v4hi __attribute__((vector_size (8))); +typedef unsigned int v2si __attribute__((vector_size (8))); + +void +foo_u8_u16 (v4hi * dst, v8qi * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v4hi *) tem; +} + +void +bar_u8_u16 (v4hi * dst, v8qi src) +{ + unsigned short tem[4]; + tem[0] = src[0]; + tem[1] = src[1]; + tem[2] = src[2]; + tem[3] = src[3]; + dst[0] = *(v4hi *) tem; +} + +/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */ + +void +foo_u8_u32 (v2si * dst, v8qi * __restrict src) +{ + unsigned int tem[2]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v2si *) tem; +} + +void +bar_u8_u32 (v2si * dst, v8qi src) +{ + unsigned int tem[2]; + tem[0] = src[0]; + tem[1] = src[1]; + dst[0] = *(v2si *) tem; +} + +/* { dg-final { scan-assembler-times "pmovzxbd" 2 } } */ + +void +foo_u16_u32 (v2si * dst, v4hi * __restrict src) +{ + unsigned int tem[2]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v2si *) tem; +} + +void +bar_u16_u32 (v2si * dst, v4hi src) +{ + unsigned int tem[2]; + tem[0] = src[0]; + tem[1] = src[1]; + dst[0] = *(v2si *) tem; +} + +/* { dg-final { scan-assembler-times "pmovzxwd" 2 } } */ |