aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2005-05-04 00:16:01 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2005-05-04 00:16:01 +0200
commit125886c749998b8f2bd8a49de7350755e92db718 (patch)
tree10defcbd02d9f2143061c0f92dcb0585843ac362 /gcc
parent21fae9376c1a132caa4eb51067f02bd81b8f91f2 (diff)
downloadgcc-125886c749998b8f2bd8a49de7350755e92db718.zip
gcc-125886c749998b8f2bd8a49de7350755e92db718.tar.gz
gcc-125886c749998b8f2bd8a49de7350755e92db718.tar.bz2
re PR rtl-optimization/21239 (Illegal elimination of SSE2 load/store using xmm intrinsics)
* config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th item in V4SF mode. PR rtl-optimization/21239 * combine.c (combine_simplify_rtx) <case VEC_SELECT>: Fix a typo. * gcc.dg/i386-sse-11.c: New test. From-SVN: r99186
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/combine.c2
-rw-r--r--gcc/config/i386/i386.c21
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/i386-sse-11.c92
5 files changed, 118 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b565816..fa84ec7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2005-05-04 Jakub Jelinek <jakub@redhat.com>
+
+ * config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th
+ item in V4SF mode.
+
+ PR rtl-optimization/21239
+ * combine.c (combine_simplify_rtx) <case VEC_SELECT>: Fix a typo.
+
2005-05-03 Kazu Hirata <kazu@cs.umass.edu>
* tree-flow.h (tree_ann_common_d): Move aux to ...
diff --git a/gcc/combine.c b/gcc/combine.c
index d9e0b4f..251c1ee 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -4742,7 +4742,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
if (GET_CODE (op0) == VEC_CONCAT)
{
HOST_WIDE_INT op0_size = GET_MODE_SIZE (GET_MODE (XEXP (op0, 0)));
- if (op0_size < offset)
+ if (offset < op0_size)
op0 = XEXP (op0, 0);
else
{
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ea10926..49d22fd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17022,32 +17022,35 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
break;
case 1:
- /* tmp = op0 = A B C D */
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
-
- /* op0 = C C D D */
+ /* target = A A B B */
emit_insn (gen_sse_unpcklps (target, target, target));
-
- /* op0 = C C D X */
+ /* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
-
- /* op0 = A B X D */
+ /* target = A X C D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (1), GEN_INT (0),
GEN_INT (2+4), GEN_INT (3+4)));
return;
case 2:
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
- ix86_expand_vector_set (false, target, val, 0);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (0+4), GEN_INT (3+4)));
return;
case 3:
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
- ix86_expand_vector_set (false, target, val, 0);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (2+4), GEN_INT (0+4)));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 53428bf..348b911 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-04 Jakub Jelinek <jakub@redhat.com>
+
+ PR rtl-optimization/21239
+ * gcc.dg/i386-sse-11.c: New test.
+
2005-05-03 Alexandre Oliva <aoliva@redhat.com>
PR target/16888
diff --git a/gcc/testsuite/gcc.dg/i386-sse-11.c b/gcc/testsuite/gcc.dg/i386-sse-11.c
new file mode 100644
index 0000000..fd72047
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/i386-sse-11.c
@@ -0,0 +1,92 @@
+/* PR rtl-optimization/21239 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse2" } */
+#include <emmintrin.h>
+#include "i386-cpuid.h"
+
+extern void abort (void);
+
+void
+foo (unsigned int x, double *y, const double *z)
+{
+ __m128d tmp;
+ while (x)
+ {
+ tmp = _mm_load_sd (z);
+ _mm_store_sd (y, tmp);
+ --x; ++z; ++y;
+ }
+}
+
+void
+bar (unsigned int x, float *y, const float *z)
+{
+ __m128 tmp;
+ unsigned int i;
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { *z, 0, 0, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 0);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, *z, 0, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 1);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, 0, *z, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 2);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, 0, 0, *z };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 3);
+ ++z; ++y;
+ }
+}
+
+void __attribute__((noinline))
+run_tests (void)
+{
+ unsigned int i;
+ double a[16], b[16];
+ float c[16], d[16];
+ for (i = 0; i < 16; ++i)
+ {
+ a[i] = 1;
+ b[i] = 2;
+ c[i] = 3;
+ d[i] = 4;
+ }
+ foo (16, a, b);
+ bar (4, c, d);
+ for (i = 0; i < 16; ++i)
+ {
+ if (a[i] != 2)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ unsigned long cpu_facilities;
+ unsigned int i;
+ double a[19], b[19];
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ /* If host has no vector support, pass. */
+ return 0;
+
+ run_tests ();
+ return 0;
+}