aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-06-24 15:39:26 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-06-24 15:40:28 +0200
commit836328b2c99f5b8d45dcca5797f162af322e74da (patch)
tree267e416f036229044f1207537b90f8ea4d508d43 /gcc
parentaddd5f0e61f73659c29f47a02e93bfc5e534dbf6 (diff)
downloadgcc-836328b2c99f5b8d45dcca5797f162af322e74da.zip
gcc-836328b2c99f5b8d45dcca5797f162af322e74da.tar.gz
gcc-836328b2c99f5b8d45dcca5797f162af322e74da.tar.bz2
i386: Add pack/unpack patterns for 64bit vectors [PR89021]
2021-06-24 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/89021 * config/i386/i386-expand.c (ix86_expand_sse_unpack): Handle V8QI and V4HI modes. * config/i386/mmx.md (sse4_1_<any_extend:code>v4qiv4hi2): New insn pattern. (sse4_1_<any_extend:code>v4qiv4hi2): Ditto. (mmxpackmode): New mode attribute. (vec_pack_trunc_<mmxpackmode:mode>): New expander. (mmxunpackmode): New mode attribute. (vec_unpacks_lo_<mmxunpackmode:mode>): New expander. (vec_unpacks_hi_<mmxunpackmode:mode>): Ditto. (vec_unpacku_lo_<mmxunpackmode:mode>): Ditto. (vec_unpacku_hi_<mmxunpackmode:mode>): Ditto. * config/i386/i386.md (extsuffix): Move from ... * config/i386/sse.md: ... here. gcc/testsuite/ PR target/89021 * gcc.dg/vect/vect-nb-iter-ub-3.c (dg-additional-options): Add --param vect-epilogues-nomask=0. * gcc.target/i386/pr97249-1.c (foo): Add #pragma GCC unroll to avoid loop vectorization. (foo1): Ditto. (foo2): Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c46
-rw-r--r--gcc/config/i386/i386.md3
-rw-r--r--gcc/config/i386/mmx.md72
-rw-r--r--gcc/config/i386/sse.md3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr97249-1.c21
6 files changed, 130 insertions, 17 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 2cb939e..e9763eb 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -5161,6 +5161,18 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
else
unpack = gen_sse4_1_sign_extendv2siv2di2;
break;
+ case E_V8QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4qiv4hi2;
+ else
+ unpack = gen_sse4_1_sign_extendv4qiv4hi2;
+ break;
+ case E_V4HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2hiv2si2;
+ else
+ unpack = gen_sse4_1_sign_extendv2hiv2si2;
+ break;
default:
gcc_unreachable ();
}
@@ -5172,10 +5184,24 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
}
else if (high_p)
{
- /* Shift higher 8 bytes to lower 8 bytes. */
- tmp = gen_reg_rtx (V1TImode);
- emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
- GEN_INT (64)));
+ switch (GET_MODE_SIZE (imode))
+ {
+ case 16:
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
+ GEN_INT (64)));
+ break;
+ case 8:
+ /* Shift higher 4 bytes to lower 4 bytes. */
+ tmp = gen_reg_rtx (V1DImode);
+ emit_insn (gen_mmx_lshrv1di3 (tmp, gen_lowpart (V1DImode, src),
+ GEN_INT (32)));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
tmp = gen_lowpart (imode, tmp);
}
else
@@ -5207,6 +5233,18 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
else
unpack = gen_vec_interleave_lowv4si;
break;
+ case E_V8QImode:
+ if (high_p)
+ unpack = gen_mmx_punpckhbw;
+ else
+ unpack = gen_mmx_punpcklbw;
+ break;
+ case E_V4HImode:
+ if (high_p)
+ unpack = gen_mmx_punpckhwd;
+ else
+ unpack = gen_mmx_punpcklwd;
+ break;
default:
gcc_unreachable ();
}
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9043be3..9b619e2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1000,6 +1000,9 @@
(define_code_attr trunsuffix
[(ss_truncate "s") (truncate "") (us_truncate "us")])
+;; Instruction suffix for SSE sign and zero extensions.
+(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
+
;; Used in signed and unsigned fix.
(define_code_iterator any_fix [fix unsigned_fix])
(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7a827dc..e887f034 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2639,6 +2639,78 @@
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
+(define_insn "sse4_1_<code>v4qiv4hi2"
+ [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
+ (any_extend:V4HI
+ (vec_select:V4QI
+ (match_operand:V8QI 1 "register_operand" "Yr,*x,Yw")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v2hiv2si2"
+ [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 1 "register_operand" "Yr,*x,v")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
+;; Pack/unpack vector modes
+(define_mode_attr mmxpackmode
+ [(V4HI "V8QI") (V2SI "V4HI")])
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<mmxpackmode> 0 "register_operand")
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:MMXMODE24 2 "register_operand")]
+ "TARGET_MMX_WITH_SSE"
+{
+ rtx op1 = gen_lowpart (<mmxpackmode>mode, operands[1]);
+ rtx op2 = gen_lowpart (<mmxpackmode>mode, operands[2]);
+ ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
+ DONE;
+})
+
+(define_mode_attr mmxunpackmode
+ [(V8QI "V4HI") (V4HI "V2SI")])
+
+(define_expand "vec_unpacks_lo_<mode>"
+ [(match_operand:<mmxunpackmode> 0 "register_operand")
+ (match_operand:MMXMODE12 1 "register_operand")]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
+
+(define_expand "vec_unpacks_hi_<mode>"
+ [(match_operand:<mmxunpackmode> 0 "register_operand")
+ (match_operand:MMXMODE12 1 "register_operand")]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
+
+(define_expand "vec_unpacku_lo_<mode>"
+ [(match_operand:<mmxunpackmode> 0 "register_operand")
+ (match_operand:MMXMODE12 1 "register_operand")]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
+
+(define_expand "vec_unpacku_hi_<mode>"
+ [(match_operand:<mmxunpackmode> 0 "register_operand")
+ (match_operand:MMXMODE12 1 "register_operand")]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
+
(define_insn "*mmx_pinsrd"
[(set (match_operand:V2SI 0 "register_operand" "=x,Yv")
(vec_merge:V2SI
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2d29877..e4f01e6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -976,9 +976,6 @@
[(V8SI "si") (V8SF "ps") (V4DF "pd")
(V16SI "si") (V16SF "ps") (V8DF "pd")])
-;; Instruction suffix for sign and zero extensions.
-(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
-
;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
;; i64x4 or f64x4 for 512bit modes.
(define_mode_attr i128
diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
index dbf5091..1666526 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c
@@ -1,4 +1,4 @@
-/* { dg-additional-options "-fdump-tree-cunroll-details" } */
+/* { dg-additional-options "-fdump-tree-cunroll-details --param vect-epilogues-nomask=0" } */
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
index 4478a34..e7d1d74 100644
--- a/gcc/testsuite/gcc.target/i386/pr97249-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -8,23 +8,26 @@
void
foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
{
- for (int i = 0 ; i != 8; i++)
- p3[i] = p1[i] + p2[i];
- return;
+ /* Avoid loop vectorization. */
+#pragma GCC unroll 8
+ for (int i = 0 ; i != 8; i++)
+ p3[i] = p1[i] + p2[i];
}
void
foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
{
- for (int i = 0 ; i != 4; i++)
- p3[i] = p1[i] + p2[i];
- return;
+ /* Avoid loop vectorization. */
+#pragma GCC unroll 4
+ for (int i = 0 ; i != 4; i++)
+ p3[i] = p1[i] + p2[i];
}
void
foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
{
- for (int i = 0 ; i != 2; i++)
- p3[i] = (long long)p1[i] + (long long)p2[i];
- return;
+ /* Avoid loop vectorization. */
+#pragma GCC unroll 2
+ for (int i = 0 ; i != 2; i++)
+ p3[i] = (long long)p1[i] + (long long)p2[i];
}