aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2022-01-02 21:12:10 +0100
committerUros Bizjak <ubizjak@gmail.com>2022-01-02 21:13:14 +0100
commit9ff206d3865df5cb8407490aa9481029beac087f (patch)
treefb8a6fee872e0a4ddc63c25ab4dd90ee7983428e /gcc/testsuite
parent6bec6e3aaa306ca7b87d6e6654acca546fa25e90 (diff)
downloadgcc-9ff206d3865df5cb8407490aa9481029beac087f.zip
gcc-9ff206d3865df5cb8407490aa9481029beac087f.tar.gz
gcc-9ff206d3865df5cb8407490aa9481029beac087f.tar.bz2
i386: Introduce V2QImode vectorized arithmetic [PR103861]
This patch adds basic V2QImode infrastructure and V2QImode arithmetic operations (plus, minus and neg). The patched compiler can emit SSE vectorized QImode operations (e.g. PADDB) with partial QImode vector, and also synthesized double HI/LO QImode operations with integer registers. The testcase: typedef char __v2qi __attribute__ ((__vector_size__ (2))); __v2qi plus (__v2qi a, __v2qi b) { return a + b; }; compiles with -O2 to: movl %edi, %edx movl %esi, %eax addb %sil, %dl addb %ah, %dh movl %edx, %eax ret which is much better than what the unpatched compiler produces: movl %edi, %eax movl %esi, %edx xorl %ecx, %ecx movb %dil, %cl movsbl %dh, %edx movsbl %ah, %eax addl %edx, %eax addb %sil, %cl movb %al, %ch movl %ecx, %eax ret The V2QImode vectorization does not require vector registers, so it can be enabled by default also for 32-bit targets without SSE. The patch also enables vectorized V2QImode sign/zero extends. 2021-12-30 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103861 * config/i386/i386.h (VALID_SSE2_REG_MODE): Add V2QImode. (VALID_INT_MODE_P): Ditto. * config/i386/i386.c (ix86_secondary_reload): Handle V2QImode reloads from SSE register to memory. (vector_mode_supported_p): Always return true for V2QImode. * config/i386/i386.md (*subqi_ext<mode>_2): New insn pattern. (*negqi_ext<mode>_2): Ditto. * config/i386/mmx.md (movv2qi): New expander. (movmisalignv2qi): Ditto. (*movv2qi_internal): New insn pattern. (*pushv2qi2): Ditto. (negv2qi2 and splitters): Ditto. (<plusminus:insn>v2qi3 and splitters): Ditto. gcc/testsuite/ChangeLog: PR target/103861 * gcc.dg/store_merging_18.c (dg-options): Add -fno-tree-vectorize. * gcc.dg/store_merging_29.c (dg-options): Ditto. * gcc.target/i386/pr103861.c: New test. * gcc.target/i386/pr92658-avx512vl.c (dg-final): Remove vpmovqb scan-assembler xfail. * gcc.target/i386/pr92658-sse4.c (dg-final): Remove pmovzxbq scan-assembler xfail. * gcc.target/i386/pr92658-sse4-2.c (dg-final): Remove pmovsxbq scan-assembler xfail. * gcc.target/i386/warn-vect-op-2.c (dg-warning): Adjust warnings.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r--gcc/testsuite/gcc.dg/store_merging_18.c2
-rw-r--r--gcc/testsuite/gcc.dg/store_merging_29.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103861.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92658-sse4.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/warn-vect-op-2.c4
7 files changed, 31 insertions, 8 deletions
diff --git a/gcc/testsuite/gcc.dg/store_merging_18.c b/gcc/testsuite/gcc.dg/store_merging_18.c
index 66e157e..fdff6b4 100644
--- a/gcc/testsuite/gcc.dg/store_merging_18.c
+++ b/gcc/testsuite/gcc.dg/store_merging_18.c
@@ -1,6 +1,6 @@
/* PR tree-optimization/83843 */
/* { dg-do run } */
-/* { dg-options "-O2 -fdump-tree-store-merging" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fdump-tree-store-merging" } */
/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target { store_merge && { ! arm*-*-* } } } } } */
__attribute__((noipa)) void
diff --git a/gcc/testsuite/gcc.dg/store_merging_29.c b/gcc/testsuite/gcc.dg/store_merging_29.c
index 6b32aa9..e7afc9d 100644
--- a/gcc/testsuite/gcc.dg/store_merging_29.c
+++ b/gcc/testsuite/gcc.dg/store_merging_29.c
@@ -1,7 +1,7 @@
/* PR tree-optimization/88709 */
/* { dg-do run { target int32 } } */
/* { dg-require-effective-target store_merge } */
-/* { dg-options "-O2 -fdump-tree-store-merging-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fdump-tree-store-merging-details" } */
/* { dg-final { scan-tree-dump "New sequence of 3 stores to replace old one of 6 stores" "store-merging" { target { le && { ! arm*-*-* } } } } } */
/* { dg-final { scan-tree-dump "New sequence of \[34] stores to replace old one of 6 stores" "store-merging" { target { be && { ! arm*-*-* } } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103861.c b/gcc/testsuite/gcc.target/i386/pr103861.c
new file mode 100644
index 0000000..1587176
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103861.c
@@ -0,0 +1,23 @@
+/* PR target/103861 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -dp" } */
+
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+
+__v2qi and (__v2qi a, __v2qi b) { return a & b; };
+
+__v2qi andn (__v2qi a, __v2qi b) { return a & ~b; };
+
+__v2qi or (__v2qi a, __v2qi b) { return a | b; };
+
+__v2qi xor (__v2qi a, __v2qi b) { return a ^ b; };
+
+__v2qi not (__v2qi a) { return ~a; };
+
+__v2qi plus (__v2qi a, __v2qi b) { return a + b; };
+
+__v2qi minus (__v2qi a, __v2qi b) { return a - b; };
+
+__v2qi neg (__v2qi a) { return -a; };
+
+/* { dg-final { scan-assembler-not "insvhi" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c
index ae6959e..d712922 100644
--- a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c
+++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c
@@ -123,7 +123,7 @@ truncdb_128 (v16qi * dst, v4si * __restrict src)
/* { dg-final { scan-assembler-times "vpmovqd" 2 } } */
/* { dg-final { scan-assembler-times "vpmovqw" 2 } } */
-/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%ymm" 1 } } */
-/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%xmm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%ymm" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%xmm" 1 } } */
/* { dg-final { scan-assembler-times "vpmovdw" 2 } } */
/* { dg-final { scan-assembler-times "vpmovdb" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c
index a1cf9e7..4a76a7d 100644
--- a/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c
@@ -81,7 +81,7 @@ bar_s8_s64 (v2di * dst, v16qi src)
dst[0] = *(v2di *) tem;
}
-/* { dg-final { scan-assembler-times "pmovsxbq" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "pmovsxbq" 2 } } */
void
foo_s16_s32 (v4si * dst, v8hi * __restrict src)
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4.c
index 9fd2eee..4f655a3 100644
--- a/gcc/testsuite/gcc.target/i386/pr92658-sse4.c
+++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4.c
@@ -81,7 +81,7 @@ bar_u8_u64 (v2di * dst, v16qi src)
dst[0] = *(v2di *) tem;
}
-/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "pmovzxbq" 2 } } */
void
foo_u16_u32 (v4si * dst, v8hi * __restrict src)
diff --git a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
index 15eb961..5e378b6 100644
--- a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
+++ b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
@@ -11,8 +11,8 @@ int main (int argc, char *argv[])
argc, 1, 15, 38, 12, -1, argc, 2};
vector (16, signed char) res[] =
{
- v0 + v1, /* { dg-warning "expanded in parallel" } */
- v0 - v1, /* { dg-warning "expanded in parallel" } */
+ v0 + v1, /* { dg-warning "expanded piecewise" } */
+ v0 - v1, /* { dg-warning "expanded piecewise" } */
v0 > v1, /* { dg-warning "expanded piecewise" } */
v0 & v1, /* { dg-warning "expanded in parallel" } */
__builtin_shuffle (v0, v1), /* { dg-warning "expanded piecewise" } */