diff options
author | Jakub Jelinek <jakub@redhat.com> | 2011-04-14 23:30:37 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2011-04-14 23:30:37 +0200 |
commit | 09db7afe4f724be4582250bd68a0110a60896e95 (patch) | |
tree | d29b70a327471a0ddf109fe2ca38f6fa09cc3315 /gcc | |
parent | c59633d9da23d48847f3b1bc8f14772beffb1435 (diff) | |
download | gcc-09db7afe4f724be4582250bd68a0110a60896e95.zip gcc-09db7afe4f724be4582250bd68a0110a60896e95.tar.gz gcc-09db7afe4f724be4582250bd68a0110a60896e95.tar.bz2 |
re PR target/48605 (gcc.target/i386/sse4_1-insertps-2.c FAILs with -mtune=geode - instruction insertps with memory operands behaves differently)
PR target/48605
* config/i386/sse.md (sse4_1_insertps): If operands[2] is a MEM,
offset it as needed based on top 2 bits in operands[3], change
MEM mode to SFmode and mask those 2 bits away from operands[3].
* gcc.target/i386/sse4_1-insertps-3.c: New test.
* gcc.target/i386/sse4_1-insertps-4.c: New test.
* gcc.target/i386/avx-insertps-3.c: New test.
* gcc.target/i386/avx-insertps-4.c: New test.
From-SVN: r172458
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 21 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vinsertps-3.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vinsertps-4.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-insertps-3.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-insertps-4.c | 92 |
7 files changed, 146 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b60fdbc..a8b773f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2011-04-14 Jakub Jelinek <jakub@redhat.com> + + PR target/48605 + * config/i386/sse.md (sse4_1_insertps): If operands[2] is a MEM, + offset it as needed based on top 2 bits in operands[3], change + MEM mode to SFmode and mask those 2 bits away from operands[3]. + 2011-04-14 Nicola Pero <nicola.pero@meta-innovation.com> * c-parser.c (c_parser_objc_protocol_definition): Updated for diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c98d464..df5e216 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3572,9 +3572,24 @@ (match_operand:SI 3 "const_0_to_255_operand" "n,n")] UNSPEC_INSERTPS))] "TARGET_SSE4_1" - "@ - insertps\t{%3, %2, %0|%0, %2, %3} - vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}" +{ + if (MEM_P (operands[2])) + { + unsigned count_s = INTVAL (operands[3]) >> 6; + if (count_s) + operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); + operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); + } + switch (which_alternative) + { + case 0: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 1: + return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + default: + gcc_unreachable (); + } +} [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_data16" "1,*") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ea105e5..0c37e53 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2011-04-14 Jakub Jelinek <jakub@redhat.com> + + PR target/48605 + * gcc.target/i386/sse4_1-insertps-3.c: New test. + * gcc.target/i386/sse4_1-insertps-4.c: New test. + * gcc.target/i386/avx-insertps-3.c: New test. + * gcc.target/i386/avx-insertps-4.c: New test. + 2011-04-14 Georg-Johann Lay <avr@gjlay.de> * gcc.target/avr/torture/pr41885.c (dg-options): Add diff --git a/gcc/testsuite/gcc.target/i386/avx-vinsertps-3.c b/gcc/testsuite/gcc.target/i386/avx-vinsertps-3.c new file mode 100644 index 0000000..9397729 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vinsertps-3.c @@ -0,0 +1,8 @@ +/* { dg-do run { target ilp32 } } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mfpmath=sse -mavx -mtune=geode" } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-insertps-3.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-vinsertps-4.c b/gcc/testsuite/gcc.target/i386/avx-vinsertps-4.c new file mode 100644 index 0000000..527b070 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vinsertps-4.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mfpmath=sse -mavx" } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-insertps-4.c" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insertps-3.c b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-3.c new file mode 100644 index 0000000..75a8073 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-3.c @@ -0,0 +1,5 @@ +/* { dg-do run { target ilp32 } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1 -mtune=geode" } */ + +#include "sse4_1-insertps-2.c" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insertps-4.c b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-4.c new file mode 100644 index 0000000..30defca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-4.c @@ -0,0 +1,92 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> +#include <string.h> + +#define msk0 0x41 +#define msk1 0x90 +#define msk2 0xe9 +#define msk3 0x70 + +#define msk4 0xFC +#define msk5 0x05 +#define msk6 0x0A +#define msk7 0x0F + +union + { + __m128 x; + float f[4]; + } val1; + +static void +TEST (void) +{ + union + { + __m128 x; + float f[4]; + } res[8], val2, tmp; + int masks[8]; + int i, j; + + val2.f[0] = 55.0; + val2.f[1] = 55.0; + val2.f[2] = 55.0; + val2.f[3] = 55.0; + + val1.f[0] = 1.; + val1.f[1] = 2.; + val1.f[2] = 3.; + val1.f[3] = 4.; + + asm volatile ("" : "+m" (val1)); + res[0].x = _mm_insert_ps (val2.x, val1.x, msk0); + asm volatile ("" : "+m" (val1)); + res[1].x = _mm_insert_ps (val2.x, val1.x, msk1); + asm volatile ("" : "+m" (val1)); + res[2].x = _mm_insert_ps (val2.x, val1.x, msk2); + asm volatile ("" : "+m" (val1)); + res[3].x = _mm_insert_ps (val2.x, val1.x, msk3); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + + for (i = 0; i < 4; i++) + { + asm volatile ("" : "+m" (val1)); + res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4); + } + + masks[4] = msk4; + masks[5] = msk4; + masks[6] = msk4; + masks[7] = msk4; + + for (i=0; i < 8; i++) + { + tmp = val2; + tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6]; + + for (j = 0; j < 4; j++) + if (masks[i] & (0x1 << j)) + tmp.f[j] = 0.f; + + if (memcmp (&res[i], &tmp, sizeof (tmp))) + abort (); + } +} |