aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2008-03-30 21:13:33 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2008-03-30 14:13:33 -0700
commit23594c97b3ec375024937db9491f82c0f07e1b4b (patch)
tree0b45e84e519da8194c4e2bcafac2c49309fb4e3e
parente14c931f31a05f6e1bacbdde9d8d87033e8dc093 (diff)
downloadgcc-23594c97b3ec375024937db9491f82c0f07e1b4b.zip
gcc-23594c97b3ec375024937db9491f82c0f07e1b4b.tar.gz
gcc-23594c97b3ec375024937db9491f82c0f07e1b4b.tar.bz2
re PR target/35757 (Incorrect contraint on sse4_1_blendp<ssemodesuffixf2c>)
gcc/ 2008-03-30 H.J. Lu <hongjiu.lu@intel.com> PR target/35757 * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue proper error message for the third argument on blendpd and blendps. * config/i386/sse.md (blendbits): New. (sse4_1_blendp<ssemodesuffixf2c>): Use it. gcc/testsuite/ 2008-03-30 H.J. Lu <hongjiu.lu@intel.com> PR target/35757 * gcc.target/i386/sse4_1-blendps-2.c: New. * gcc.target/i386/sse4_1-pblendw-2.c: Likewise. From-SVN: r133736
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/i386/i386.c5
-rw-r--r--gcc/config/i386/sse.md5
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c79
6 files changed, 181 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4d954bf..a0e29cc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/35757
+ * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
+ proper error message for the third argument on blendpd and
+ blendps.
+
+ * config/i386/sse.md (blendbits): New.
+ (sse4_1_blendp<ssemodesuffixf2c>): Use it.
+
2008-03-30 Eric Botcazou <ebotcazou@adacore.com>
* fold-const.c (fold_binary) <BIT_IOR_EXPR>: Add missing conversions.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4fc8fcd..db593a6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19738,9 +19738,14 @@ ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
error ("the third argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sse4_1_blendpd:
+ error ("the third argument must be a 2-bit immediate");
+ return const0_rtx;
+
default:
error ("the third argument must be an 8-bit immediate");
return const0_rtx;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2ac9fb1..ad17209 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -53,6 +53,9 @@
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
+
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -6283,7 +6286,7 @@
(vec_merge:SSEMODEF2P
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
(match_operand:SSEMODEF2P 1 "register_operand" "0")
- (match_operand:SI 3 "const_0_to_3_operand" "n")))]
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
"TARGET_SSE4_1"
"blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c6a8b3c..3f8205a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/35757
+ * gcc.target/i386/sse4_1-blendps-2.c: New.
+ * gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
+
2008-03-30 Thomas Koenig <tkoenig@gcc.gnu.org>
* gfortran.dg/internal_pack_1.f90: Added complex to test case.
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
new file mode 100644
index 0000000..b66bbfd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xe
+
+static void
+init_blendps (float *src1, float *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 4; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_blendps (__m128 *dst, float *src1, float *src2)
+{
+ float tmp[4];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 4; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128 x, y;
+ union
+ {
+ __m128 x[NUM];
+ float f[NUM * 4];
+ } dst, src1, src2;
+ union
+ {
+ __m128 x;
+ float f[4];
+ } src3;
+ int i;
+
+ init_blendps (src1.f, src2.f);
+
+ /* Check blendps imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
+ if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
+ abort ();
+ }
+
+ /* Check blendps imm8, xmm, xmm */
+ x = _mm_blend_ps (dst.x[2], src3.x, MASK);
+ y = _mm_blend_ps (src3.x, dst.x[2], MASK);
+
+ if (check_blendps (&x, &dst.f[8], &src3.f[0]))
+ abort ();
+
+ if (check_blendps (&y, &src3.f[0], &dst.f[8]))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c
new file mode 100644
index 0000000..eecc6ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xfe
+
+static void
+init_pblendw (short *src1, short *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 8; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_pblendw (__m128i *dst, short *src1, short *src2)
+{
+ short tmp[8];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 8; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128i x, y;
+ union
+ {
+ __m128i x[NUM];
+ short s[NUM * 8];
+ } dst, src1, src2;
+ union
+ {
+ __m128i x;
+ short s[8];
+ } src3;
+ int i;
+
+ init_pblendw (src1.s, src2.s);
+
+ /* Check pblendw imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
+ if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
+ abort ();
+ }
+
+ /* Check pblendw imm8, xmm, xmm */
+ src3.x = _mm_setzero_si128 ();
+
+ x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
+ y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
+
+ if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
+ abort ();
+
+ if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
+ abort ();
+}