diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-06-24 15:39:26 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-06-24 15:40:28 +0200 |
commit | 836328b2c99f5b8d45dcca5797f162af322e74da (patch) | |
tree | 267e416f036229044f1207537b90f8ea4d508d43 /gcc | |
parent | addd5f0e61f73659c29f47a02e93bfc5e534dbf6 (diff) | |
download | gcc-836328b2c99f5b8d45dcca5797f162af322e74da.zip gcc-836328b2c99f5b8d45dcca5797f162af322e74da.tar.gz gcc-836328b2c99f5b8d45dcca5797f162af322e74da.tar.bz2 |
i386: Add pack/unpack patterns for 64bit vectors [PR89021]
2021-06-24 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/89021
* config/i386/i386-expand.c (ix86_expand_sse_unpack):
Handle V8QI and V4HI modes.
* config/i386/mmx.md (sse4_1_<any_extend:code>v4qiv4hi2):
New insn pattern.
(sse4_1_<any_extend:code>v4qiv4hi2): Ditto.
(mmxpackmode): New mode attribute.
(vec_pack_trunc_<mmxpackmode:mode>): New expander.
(mmxunpackmode): New mode attribute.
(vec_unpacks_lo_<mmxunpackmode:mode>): New expander.
(vec_unpacks_hi_<mmxunpackmode:mode>): Ditto.
(vec_unpacku_lo_<mmxunpackmode:mode>): Ditto.
(vec_unpacku_hi_<mmxunpackmode:mode>): Ditto.
* config/i386/i386.md (extsuffix): Move from ...
* config/i386/sse.md: ... here.
gcc/testsuite/
PR target/89021
* gcc.dg/vect/vect-nb-iter-ub-3.c (dg-additional-options):
Add --param vect-epilogues-nomask=0.
* gcc.target/i386/pr97249-1.c (foo): Add #pragma GCC unroll
to avoid loop vectorization.
(foo1): Ditto.
(foo2): Ditto.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-expand.c | 46 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 3 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 72 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr97249-1.c | 21 |
6 files changed, 130 insertions, 17 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 2cb939e..e9763eb 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -5161,6 +5161,18 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) else unpack = gen_sse4_1_sign_extendv2siv2di2; break; + case E_V8QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4qiv4hi2; + else + unpack = gen_sse4_1_sign_extendv4qiv4hi2; + break; + case E_V4HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2hiv2si2; + else + unpack = gen_sse4_1_sign_extendv2hiv2si2; + break; default: gcc_unreachable (); } @@ -5172,10 +5184,24 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) } else if (high_p) { - /* Shift higher 8 bytes to lower 8 bytes. */ - tmp = gen_reg_rtx (V1TImode); - emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), - GEN_INT (64))); + switch (GET_MODE_SIZE (imode)) + { + case 16: + /* Shift higher 8 bytes to lower 8 bytes. */ + tmp = gen_reg_rtx (V1TImode); + emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), + GEN_INT (64))); + break; + case 8: + /* Shift higher 4 bytes to lower 4 bytes. */ + tmp = gen_reg_rtx (V1DImode); + emit_insn (gen_mmx_lshrv1di3 (tmp, gen_lowpart (V1DImode, src), + GEN_INT (32))); + break; + default: + gcc_unreachable (); + } + tmp = gen_lowpart (imode, tmp); } else @@ -5207,6 +5233,18 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) else unpack = gen_vec_interleave_lowv4si; break; + case E_V8QImode: + if (high_p) + unpack = gen_mmx_punpckhbw; + else + unpack = gen_mmx_punpcklbw; + break; + case E_V4HImode: + if (high_p) + unpack = gen_mmx_punpckhwd; + else + unpack = gen_mmx_punpcklwd; + break; default: gcc_unreachable (); } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9043be3..9b619e2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1000,6 +1000,9 @@ (define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")]) +;; Instruction suffix for SSE sign and zero extensions. +(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) + ;; Used in signed and unsigned fix. (define_code_iterator any_fix [fix unsigned_fix]) (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")]) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 7a827dc..e887f034 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2639,6 +2639,78 @@ (set_attr "type" "mmxcvt,sselog,sselog") (set_attr "mode" "DI,TI,TI")]) +(define_insn "sse4_1_<code>v4qiv4hi2" + [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw") + (any_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 1 "register_operand" "Yr,*x,Yw") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) + +(define_insn "sse4_1_<code>v2hiv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v") + (any_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "Yr,*x,v") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "TI")]) + +;; Pack/unpack vector modes +(define_mode_attr mmxpackmode + [(V4HI "V8QI") (V2SI "V4HI")]) + +(define_expand "vec_pack_trunc_<mode>" + [(match_operand:<mmxpackmode> 0 "register_operand") + (match_operand:MMXMODE24 1 "register_operand") + (match_operand:MMXMODE24 2 "register_operand")] + "TARGET_MMX_WITH_SSE" +{ + rtx op1 = gen_lowpart (<mmxpackmode>mode, operands[1]); + rtx op2 = gen_lowpart (<mmxpackmode>mode, operands[2]); + ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); + DONE; +}) + +(define_mode_attr mmxunpackmode + [(V8QI "V4HI") (V4HI "V2SI")]) + +(define_expand "vec_unpacks_lo_<mode>" + [(match_operand:<mmxunpackmode> 0 "register_operand") + (match_operand:MMXMODE12 1 "register_operand")] + "TARGET_MMX_WITH_SSE" + "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") + +(define_expand "vec_unpacks_hi_<mode>" + [(match_operand:<mmxunpackmode> 0 "register_operand") + (match_operand:MMXMODE12 1 "register_operand")] + "TARGET_MMX_WITH_SSE" + "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") + +(define_expand "vec_unpacku_lo_<mode>" + [(match_operand:<mmxunpackmode> 0 "register_operand") + (match_operand:MMXMODE12 1 "register_operand")] + "TARGET_MMX_WITH_SSE" + "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") + +(define_expand "vec_unpacku_hi_<mode>" + [(match_operand:<mmxunpackmode> 0 "register_operand") + (match_operand:MMXMODE12 1 "register_operand")] + "TARGET_MMX_WITH_SSE" + "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") + (define_insn "*mmx_pinsrd" [(set (match_operand:V2SI 0 "register_operand" "=x,Yv") (vec_merge:V2SI diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 2d29877..e4f01e6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -976,9 +976,6 @@ [(V8SI "si") (V8SF "ps") (V4DF "pd") (V16SI "si") (V16SF "ps") (V8DF "pd")]) -;; Instruction suffix for sign and zero extensions. -(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) - ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. ;; i64x4 or f64x4 for 512bit modes. (define_mode_attr i128 diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c index dbf5091..1666526 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-fdump-tree-cunroll-details" } */ +/* { dg-additional-options "-fdump-tree-cunroll-details --param vect-epilogues-nomask=0" } */ #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c index 4478a34..e7d1d74 100644 --- a/gcc/testsuite/gcc.target/i386/pr97249-1.c +++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c @@ -8,23 +8,26 @@ void foo (unsigned char* p1, unsigned char* p2, short* __restrict p3) { - for (int i = 0 ; i != 8; i++) - p3[i] = p1[i] + p2[i]; - return; + /* Avoid loop vectorization. */ +#pragma GCC unroll 8 + for (int i = 0 ; i != 8; i++) + p3[i] = p1[i] + p2[i]; } void foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3) { - for (int i = 0 ; i != 4; i++) - p3[i] = p1[i] + p2[i]; - return; + /* Avoid loop vectorization. */ +#pragma GCC unroll 4 + for (int i = 0 ; i != 4; i++) + p3[i] = p1[i] + p2[i]; } void foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3) { - for (int i = 0 ; i != 2; i++) - p3[i] = (long long)p1[i] + (long long)p2[i]; - return; + /* Avoid loop vectorization. */ +#pragma GCC unroll 2 + for (int i = 0 ; i != 2; i++) + p3[i] = (long long)p1[i] + (long long)p2[i]; } |