diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2008-03-30 21:13:33 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2008-03-30 14:13:33 -0700 |
commit | 23594c97b3ec375024937db9491f82c0f07e1b4b (patch) | |
tree | 0b45e84e519da8194c4e2bcafac2c49309fb4e3e /gcc | |
parent | e14c931f31a05f6e1bacbdde9d8d87033e8dc093 (diff) | |
download | gcc-23594c97b3ec375024937db9491f82c0f07e1b4b.zip gcc-23594c97b3ec375024937db9491f82c0f07e1b4b.tar.gz gcc-23594c97b3ec375024937db9491f82c0f07e1b4b.tar.bz2 |
re PR target/35757 (Incorrect contraint on sse4_1_blendp<ssemodesuffixf2c>)
gcc/
2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/35757
* config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
proper error message for the third argument on blendpd and
blendps.
* config/i386/sse.md (blendbits): New.
(sse4_1_blendp<ssemodesuffixf2c>): Use it.
gcc/testsuite/
2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/35757
* gcc.target/i386/sse4_1-blendps-2.c: New.
* gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
From-SVN: r133736
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 5 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 5 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c | 77 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c | 79 |
6 files changed, 181 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4d954bf..a0e29cc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2008-03-30 H.J. Lu <hongjiu.lu@intel.com> + + PR target/35757 + * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue + proper error message for the third argument on blendpd and + blendps. + + * config/i386/sse.md (blendbits): New. + (sse4_1_blendp<ssemodesuffixf2c>): Use it. + 2008-03-30 Eric Botcazou <ebotcazou@adacore.com> * fold-const.c (fold_binary) <BIT_IOR_EXPR>: Add missing conversions. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4fc8fcd..db593a6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19738,9 +19738,14 @@ ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp, case CODE_FOR_sse4_1_roundsd: case CODE_FOR_sse4_1_roundss: + case CODE_FOR_sse4_1_blendps: error ("the third argument must be a 4-bit immediate"); return const0_rtx; + case CODE_FOR_sse4_1_blendpd: + error ("the third argument must be a 2-bit immediate"); + return const0_rtx; + default: error ("the third argument must be an 8-bit immediate"); return const0_rtx; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 2ac9fb1..ad17209 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -53,6 +53,9 @@ ;; Mapping of vector modes back to the scalar modes (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")]) +;; Mapping of immediate bits for blend instructions +(define_mode_attr blendbits [(V4SF "15") (V2DF "3")]) + ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -6283,7 +6286,7 @@ (vec_merge:SSEMODEF2P (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SI 3 "const_0_to_3_operand" "n")))] + (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] "TARGET_SSE4_1" "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemov") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c6a8b3c..3f8205a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2008-03-30 H.J. Lu <hongjiu.lu@intel.com> + + PR target/35757 + * gcc.target/i386/sse4_1-blendps-2.c: New. + * gcc.target/i386/sse4_1-pblendw-2.c: Likewise. + 2008-03-30 Thomas Koenig <tkoenig@gcc.gnu.org> * gfortran.dg/internal_pack_1.f90: Added complex to test case. diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c new file mode 100644 index 0000000..b66bbfd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +#undef MASK +#define MASK 0xe + +static void +init_blendps (float *src1, float *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 4; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_blendps (__m128 *dst, float *src1, float *src2) +{ + float tmp[4]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 4; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128 x, y; + union + { + __m128 x[NUM]; + float f[NUM * 4]; + } dst, src1, src2; + union + { + __m128 x; + float f[4]; + } src3; + int i; + + init_blendps (src1.f, src2.f); + + /* Check blendps imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) + abort (); + } + + /* Check blendps imm8, xmm, xmm */ + x = _mm_blend_ps (dst.x[2], src3.x, MASK); + y = _mm_blend_ps (src3.x, dst.x[2], MASK); + + if (check_blendps (&x, &dst.f[8], &src3.f[0])) + abort (); + + if (check_blendps (&y, &src3.f[0], &dst.f[8])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c new file mode 100644 index 0000000..eecc6ed --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c @@ -0,0 +1,79 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +#undef MASK +#define MASK 0xfe + +static void +init_pblendw (short *src1, short *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 8; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_pblendw (__m128i *dst, short *src1, short *src2) +{ + short tmp[8]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 8; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128i x, y; + union + { + __m128i x[NUM]; + short s[NUM * 8]; + } dst, src1, src2; + union + { + __m128i x; + short s[8]; + } src3; + int i; + + init_pblendw (src1.s, src2.s); + + /* Check pblendw imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK); + if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8])) + abort (); + } + + /* Check pblendw imm8, xmm, xmm */ + src3.x = _mm_setzero_si128 (); + + x = _mm_blend_epi16 (dst.x[2], src3.x, MASK); + y = _mm_blend_epi16 (src3.x, dst.x[2], MASK); + + if (check_pblendw (&x, &dst.s[16], &src3.s[0])) + abort (); + + if (check_pblendw (&y, &src3.s[0], &dst.s[16])) + abort (); +} |