aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@eyesopen.com>2006-04-16 21:46:59 +0000
committerRoger Sayle <sayle@gcc.gnu.org>2006-04-16 21:46:59 +0000
commit2ff619482da2a22a0099aacfd9e3118e25256aac (patch)
treeb6b0e1ede375a1e186941f57d26fa43ea68e93cf
parent3c86fb4e17941da9eb4026bda6301bf0a74a96fe (diff)
downloadgcc-2ff619482da2a22a0099aacfd9e3118e25256aac.zip
gcc-2ff619482da2a22a0099aacfd9e3118e25256aac.tar.gz
gcc-2ff619482da2a22a0099aacfd9e3118e25256aac.tar.bz2
re PR target/24076 ((vector char){x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x} code gen is not that good)
2006-04-15 Roger Sayle <roger@eyesopen.com> Andrew Pinski <pinskia@gcc.gnu.org> Dale Johannesen <dalej@apple.com> PR target/24076 * config/i386/i386.c (ix86_expand_vector_init_duplicate): Add special case code to implement V8HImode and V16QImode with SSE2. * gcc.target/i386/vecinit-3.c: New testcase. * gcc.target/i386/vecinit-4.c: Likewise. * gcc.target/i386/sse-18.c: Likewise. * gcc.target/i386/sse-19.c: Likewise. Co-Authored-By: Andrew Pinski <pinskia@gcc.gnu.org> Co-Authored-By: Dale Johannesen <dalej@apple.com> From-SVN: r112990
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/i386/i386.c55
-rw-r--r--gcc/testsuite/ChangeLog10
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-18.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-19.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/vecinit-3.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/vecinit-4.c7
7 files changed, 154 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 10b55b1..9fbdca0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2006-04-16 Roger Sayle <roger@eyesopen.com>
+ Andrew Pinski <pinskia@gcc.gnu.org>
+ Dale Johannesen <dalej@apple.com>
+
+ PR target/24076
+ * config/i386/i386.c (ix86_expand_vector_init_duplicate): Add
+ special case code to implement V8HImode and V16QImode with SSE2.
+
2006-04-15 Roger Sayle <roger@eyesopen.com>
* config/i386/i386.c (ix86_va_start): Ensure all integer constant
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4d3a972..3fc19bf 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17856,11 +17856,66 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
wvmode = V4HImode;
goto widen;
case V8HImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend HImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V8HImode vector. */
+ tmp1 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
+ /* Duplicate the low short through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
+ /* Cast the V8HImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V8HImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
+ return true;
+ }
smode = HImode;
wsmode = SImode;
wvmode = V4SImode;
goto widen;
case V16QImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend QImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V16QImode vector. */
+ tmp1 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
+ /* Duplicate the low byte through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ /* Cast the V16QImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V16QImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
+ return true;
+ }
smode = QImode;
wsmode = HImode;
wvmode = V8HImode;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index dc960cf..313a7fc 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,8 +1,16 @@
+2006-04-16 Roger Sayle <roger@eyesopen.com>
+ Dale Johannesen <dalej@apple.com>
+
+ PR target/24076
+ * gcc.target/i386/vecinit-3.c: New testcase.
+ * gcc.target/i386/vecinit-4.c: Likewise.
+ * gcc.target/i386/sse-18.c: Likewise.
+ * gcc.target/i386/sse-19.c: Likewise.
+
2006-04-16 Thomas Koenig <Thomas.Koenig@online.de>
* gfortran.dg/allocate_zerosize_1.f90: New test.
-
2006-04-16 Mark Mitchell <mark@codesourcery.com>
PR c++/26365
diff --git a/gcc/testsuite/gcc.target/i386/sse-18.c b/gcc/testsuite/gcc.target/i386/sse-18.c
new file mode 100644
index 0000000..61026a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-18.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -msse2" } */
+extern void abort(void);
+#include <emmintrin.h>
+#include "../../gcc.dg/i386-cpuid.h"
+__m128i foo (char) __attribute__((noinline));
+__m128i foo (char x) {
+ return _mm_set1_epi8(x);
+}
+__m128i bar (char) __attribute__((noinline));
+__m128i bar (char x) {
+ return _mm_set_epi8 (x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x);
+}
+
+main() {
+ int i, j;
+ union u { __m128i v; char c[16]; };
+ union u x, y;
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & (bit_MMX | bit_SSE | bit_CMOV))
+ != (bit_MMX | bit_SSE | bit_CMOV))
+ /* If host has no vector support, pass. */
+ return 0;
+
+ for (i = -128; i <= 127; i++)
+ {
+ x.v = foo ((char)i);
+ y.v = bar ((char)i);
+ for (j=0; j<16; j++)
+ if (x.c[j] != y.c[j])
+ abort();
+ }
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/sse-19.c b/gcc/testsuite/gcc.target/i386/sse-19.c
new file mode 100644
index 0000000..43c090b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-19.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2" } */
+/* { dg-final { scan-assembler "punpcklbw" } } */
+extern void abort();
+#include <emmintrin.h>
+__m128i foo (char) __attribute__((noinline));
+__m128i foo (char x) {
+ return _mm_set1_epi8(x);
+}
+__m128i bar (char) __attribute__((noinline));
+__m128i bar (char x) {
+ return _mm_set_epi8 (x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x);
+}
+
+main() {
+ int i, j;
+ union u { __m128i v; char c[16]; };
+ union u x, y;
+ for (i = -128; i <= 127; i++)
+ {
+ x.v = foo ((char)i);
+ y.v = bar ((char)i);
+ for (j=0; j<16; j++)
+ if (x.c[j] != y.c[j])
+ abort();
+ }
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/vecinit-3.c b/gcc/testsuite/gcc.target/i386/vecinit-3.c
new file mode 100644
index 0000000..4cbf521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vecinit-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+#define vector __attribute__((vector_size(16)))
+
+char a;
+vector char f(void) { return (vector char){ a, a, a, a, a, a, a, a,
+ a, a, a, a, a, a, a, a }; }
+/* { dg-final { scan-assembler-not "sall" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vecinit-4.c b/gcc/testsuite/gcc.target/i386/vecinit-4.c
new file mode 100644
index 0000000..7a8c1d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vecinit-4.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+#define vector __attribute__((vector_size(16)))
+
+short a;
+vector short f(void) { return (vector short){ a, a, a, a, a, a, a, a }; }
+/* { dg-final { scan-assembler-not "sall" } } */