aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/testsuite/ChangeLog22
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-check.h20
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-crc32.h163
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c9
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c110
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c75
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c110
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c107
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c67
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c107
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h447
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h41
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c8
20 files changed, 1500 insertions, 0 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 759dd3c..23d3294 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,25 @@
+2007-06-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/sse4_2-check.h: New.
+ * gcc.target/i386/sse4_2-crc32b.c: Likewise.
+ * gcc.target/i386/sse4_2-crc32.h: Likewise.
+ * gcc.target/i386/sse4_2-crc32l.c: Likewise.
+ * gcc.target/i386/sse4_2-crc32q.c: Likewise.
+ * gcc.target/i386/sse4_2-crc32w.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpestri-1.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpestri-2.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpestrm-1.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpestrm-2.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpgtq.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpistri-1.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpistri-2.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpistrm-1.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpistrm-2.c: Likewise.
+ * gcc.target/i386/sse4_2-pcmpstr.h: Likewise.
+ * gcc.target/i386/sse4_2-popcnt.h: Likewise.
+ * gcc.target/i386/sse4_2-popcntl.c: Likewise.
+ * gcc.target/i386/sse4_2-popcntq.c: Likewise.
+
2007-06-01 Geoffrey Keating <geoffk@apple.com>
* gcc.dg/pie-link.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-check.h b/gcc/testsuite/gcc.target/i386/sse4_2-check.h
new file mode 100644
index 0000000..49bc0b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-check.h
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../../gcc.dg/i386-cpuid.h"
+
+static void sse4_2_test (void);
+
+int
+main ()
+{
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid_ecx ();
+
+ /* Run SSE4.2 test only if host has SSE4.2 support. */
+ if ((cpu_facilities & bit_SSE4_2))
+ sse4_2_test ();
+
+ exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32.h b/gcc/testsuite/gcc.target/i386/sse4_2-crc32.h
new file mode 100644
index 0000000..c0bcd16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32.h
@@ -0,0 +1,163 @@
+#include "sse4_2-check.h"
+
+#include <nmmintrin.h>
+#include <string.h>
+
+#define POLYNOMIAL 0x11EDC6F41LL
+
+#define MAX_BUF 16
+
+static void
+shift_mem_by1 (unsigned char* buf, int len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--)
+ {
+ buf[i] = buf[i] << 1;
+ if (i > 0 && (buf[i-1] & 0x80))
+ buf[i] |= 1;
+ }
+}
+
+static void
+do_div (unsigned char* buf, unsigned char* div)
+{
+ int i;
+ for (i = 0; i < 5; i++)
+ buf[i] ^= div[i];
+}
+
+static unsigned int
+calc_rem (unsigned char* buf, int len)
+{
+ union
+ {
+ unsigned long long ll;
+ unsigned char c[8];
+ } divisor;
+ union
+ {
+ unsigned int i;
+ unsigned char c[4];
+ } ret;
+ unsigned char *div_buf;
+ unsigned char divident[MAX_BUF];
+ int disp = len / 8;
+ int i;
+
+ divisor.ll = POLYNOMIAL << 7LL;
+
+ memcpy (divident, buf, disp);
+
+ div_buf = divident + disp - 5;
+
+ for (i = 0; i < len - 32; i++)
+ {
+ if ((div_buf[4] & 0x80))
+ do_div (div_buf, divisor.c);
+ shift_mem_by1 (divident, disp);
+ }
+
+ memcpy (ret.c, div_buf + 1, sizeof (ret));
+ return ret.i;
+}
+
+static void
+reverse_bits (unsigned char *src, int len)
+{
+ unsigned char buf[MAX_BUF];
+ unsigned char *tmp = buf + len - 1;
+ unsigned char ch;
+ int i, j;
+
+ for (i = 0; i < len; i++)
+ {
+ ch = 0;
+ for (j = 0; j < 8; j++)
+ if ((src[i] & (1 << j)))
+ ch |= 1 << (7 - j);
+ *tmp-- = ch;
+ }
+
+ for (i = 0; i < len; i++)
+ src[i] = buf[i];
+}
+
+static void
+shift_mem ( unsigned char *src, unsigned char *dst, int len, int shft)
+{
+ int disp = shft / 8;
+ int i;
+
+ memset (dst, 0, len + disp);
+ for (i = 0; i < len; i++)
+ dst[i + disp] = src[i];
+}
+
+static void
+xor_mem (unsigned char *src, unsigned char *dst, int len)
+{
+ int disp = len / 8;
+ int i;
+
+ for (i = 0; i < disp; i++)
+ dst[i] ^= src[i];
+}
+
+static DST_T
+compute_crc32 (DST_T crc, SRC_T inp)
+{
+ unsigned char crcbuf[sizeof (DST_T)];
+ unsigned char inbuf[sizeof (SRC_T)];
+ unsigned char tmp1[MAX_BUF], tmp2[MAX_BUF];
+ int crc_sh, xor_sz;
+ union
+ {
+ unsigned int i;
+ unsigned char c[4];
+ } ret;
+
+ crc_sh = sizeof (SRC_T) * 8;
+ xor_sz = 32 + crc_sh;
+ memcpy (crcbuf, &crc, sizeof (DST_T));
+ memcpy (inbuf, &inp, sizeof (SRC_T));
+
+ reverse_bits (crcbuf, 4);
+ reverse_bits (inbuf, sizeof (SRC_T));
+
+ shift_mem (inbuf, tmp1, sizeof (SRC_T), 32);
+ shift_mem (crcbuf, tmp2, 4, crc_sh);
+
+ xor_mem (tmp1, tmp2, xor_sz);
+
+ ret.i = calc_rem (tmp2, xor_sz);
+
+ reverse_bits (ret.c, 4);
+
+ return (DST_T)ret.i;
+}
+
+#define NUM 1024
+
+static void
+sse4_2_test (void)
+{
+ DST_T dst[NUM];
+ SRC_T src[NUM];
+ int i;
+
+ for (i = 0; i < NUM; i++)
+ {
+ dst[i] = rand ();
+ if (sizeof (DST_T) > 4)
+ dst[i] |= (DST_T)rand () << (DST_T)(sizeof (DST_T) * 4);
+ src[i] = rand ();
+ if (sizeof (SRC_T) > 4)
+ src[i] |= (SRC_T)rand () << (SRC_T)(sizeof (DST_T) * 4);
+ }
+
+ for (i = 0; i < NUM; i++)
+ if (CRC32 (dst[i], src[i]) != compute_crc32 (dst[i], src[i]))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c
new file mode 100644
index 0000000..debede3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c
@@ -0,0 +1,9 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#define CRC32 _mm_crc32_u8
+#define DST_T unsigned int
+#define SRC_T unsigned char
+
+#include "sse4_2-crc32.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c
new file mode 100644
index 0000000..120101b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c
@@ -0,0 +1,9 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#define CRC32 _mm_crc32_u32
+#define DST_T unsigned int
+#define SRC_T unsigned int
+
+#include "sse4_2-crc32.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c
new file mode 100644
index 0000000..b6350e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c
@@ -0,0 +1,9 @@
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#define CRC32 _mm_crc32_u64
+#define DST_T unsigned long long
+#define SRC_T unsigned long long
+
+#include "sse4_2-crc32.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c
new file mode 100644
index 0000000..9671974
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c
@@ -0,0 +1,9 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#define CRC32 _mm_crc32_u16
+#define DST_T unsigned int
+#define SRC_T unsigned short
+
+#include "sse4_2-crc32.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c
new file mode 100644
index 0000000..8f7a8e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c
@@ -0,0 +1,74 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_MOST_SIGNIFICANT)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_LEAST_SIGNIFICANT)
+
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ int res, correct, l1, l2;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ l1 = rand () % 18;
+ l2 = rand () % 18;
+
+ switch ((rand () % 4))
+ {
+ case 0:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
+ NULL);
+ break;
+
+ case 1:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
+ NULL);
+ break;
+
+ case 2:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
+ NULL);
+ break;
+
+ default:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
+ NULL);
+ break;
+ }
+
+ if (correct != res)
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c
new file mode 100644
index 0000000..6f5cdf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c
@@ -0,0 +1,110 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_MOST_SIGNIFICANT)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_LEAST_SIGNIFICANT)
+
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ int res, correct, correct_flags, l1, l2;
+ int flags, cf, zf, sf, of, af;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ l1 = rand () % 18;
+ l2 = rand () % 18;
+
+ switch ((rand () % 4))
+ {
+ case 0:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
+ &correct_flags);
+ break;
+
+ case 1:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
+ &correct_flags);
+ break;
+
+ case 2:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
+ &correct_flags);
+ break;
+
+ default:
+ res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
+ &correct_flags);
+ break;
+ }
+
+ if (correct != res)
+ abort ();
+
+ flags = 0;
+ if (cf)
+ flags |= CFLAG;
+ if (zf)
+ flags |= ZFLAG;
+ if (sf)
+ flags |= SFLAG;
+ if (of)
+ flags |= OFLAG;
+
+ if (flags != correct_flags
+ || (af && (cf || zf))
+ || (!af && !(cf || zf)))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c
new file mode 100644
index 0000000..51bab00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c
@@ -0,0 +1,75 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_BIT_MASK)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK)
+
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ __m128i res, correct;
+ int l1, l2;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ l1 = rand () % 18;
+ l2 = rand () % 18;
+
+ switch((rand() % 4))
+ {
+ case 0:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
+ NULL);
+ break;
+
+ case 1:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
+ NULL);
+ break;
+
+ case 2:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
+ NULL);
+ break;
+
+ default:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
+ NULL);
+ break;
+ }
+
+ if (memcmp (&correct, &res, sizeof (res)))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c
new file mode 100644
index 0000000..c3dd982
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c
@@ -0,0 +1,110 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_BIT_MASK)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK)
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ __m128i res, correct;
+ int correct_flags, l1, l2;
+ int flags, cf, zf, sf, of, af;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ l1 = rand () % 18;
+ l2 = rand () % 18;
+
+ switch ((rand () % 4))
+ {
+ case 0:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
+ &correct_flags);
+ break;
+
+ case 1:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
+ &correct_flags);
+ break;
+
+ case 2:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
+ &correct_flags);
+ break;
+
+ default:
+ res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
+ correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
+ &correct_flags);
+ break;
+ }
+
+ if (memcmp (&correct, &res, sizeof (res)))
+ abort ();
+
+ flags = 0;
+ if (cf)
+ flags |= CFLAG;
+ if (zf)
+ flags |= ZFLAG;
+ if (sf)
+ flags |= SFLAG;
+ if (of)
+ flags |= OFLAG;
+
+ if (flags != correct_flags
+ || (af && (cf || zf))
+ || (!af && !(cf || zf)))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c
new file mode 100644
index 0000000..a0e7388
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+
+#include <nmmintrin.h>
+
+#define NUM 64
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM / 2];
+ long long ll[NUM];
+ } dst, src1, src2;
+ int i, sign = 1;
+ long long is_eq;
+
+ for (i = 0; i < NUM; i++)
+ {
+ src1.ll[i] = i * i * sign;
+ src2.ll[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < NUM; i += 2)
+ dst.x[i / 2] = _mm_cmpgt_epi64 (src1.x[i / 2], src2.x[i / 2]);
+
+ for (i = 0; i < NUM; i++)
+ {
+ is_eq = src1.ll[i] > src2.ll[i] ? 0xFFFFFFFFFFFFFFFFLL : 0LL;
+ if (is_eq != dst.ll[i])
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c
new file mode 100644
index 0000000..033c07c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_MOST_SIGNIFICANT)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_MOST_SIGNIFICANT)
+
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ int res, correct;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ switch ((rand () % 4))
+ {
+ case 0:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL0);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL0, NULL);
+ break;
+
+ case 1:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL1);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL1, NULL);
+ break;
+
+ case 2:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL2);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL2, NULL);
+ break;
+
+ default:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL3);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL3, NULL);
+ break;
+ }
+
+ if (correct != res)
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c
new file mode 100644
index 0000000..d2379d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c
@@ -0,0 +1,107 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_MOST_SIGNIFICANT)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_MOST_SIGNIFICANT)
+
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ int res, correct, correct_flags;
+ int flags, cf, zf, sf, of, af;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ switch ((rand () % 4))
+ {
+ case 0:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL0);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL0);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL0);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL0);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL0);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL0);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL0,
+ &correct_flags);
+ break;
+
+ case 1:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL1);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL1);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL1);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL1);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL1);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL1);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL1,
+ &correct_flags);
+ break;
+
+ case 2:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL2);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL2);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL2);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL2);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL2);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL2);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL2,
+ &correct_flags);
+ break;
+
+ default:
+ res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL3);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL3);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL3);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL3);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL3);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL3);
+ correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL3,
+ &correct_flags);
+ break;
+ }
+
+ if (correct != res)
+ abort ();
+
+ flags = 0;
+ if (cf)
+ flags |= CFLAG;
+ if (zf)
+ flags |= ZFLAG;
+ if (sf)
+ flags |= SFLAG;
+ if (of)
+ flags |= OFLAG;
+
+ if (flags != correct_flags
+ || (af && (cf || zf))
+ || (!af && !(cf || zf)))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c
new file mode 100644
index 0000000..3d2ef24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c
@@ -0,0 +1,67 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_BIT_MASK)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK)
+
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ __m128i res, correct;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ switch((rand() % 4))
+ {
+ case 0:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL0);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL0, NULL);
+ break;
+
+ case 1:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL1);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL1, NULL);
+ break;
+
+ case 2:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL2);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL2, NULL);
+ break;
+
+ default:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL3);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL3, NULL);
+ break;
+ }
+
+ if (memcmp (&correct, &res, sizeof (res)))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c
new file mode 100644
index 0000000..1890da5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c
@@ -0,0 +1,107 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#include "sse4_2-check.h"
+#include "sse4_2-pcmpstr.h"
+
+#define NUM 1024
+
+#define IMM_VAL0 \
+ (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
+#define IMM_VAL1 \
+ (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
+ | SIDD_BIT_MASK)
+#define IMM_VAL2 \
+ (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
+#define IMM_VAL3 \
+ (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
+ | SIDD_POSITIVE_POLARITY | SIDD_UNIT_MASK)
+
+static void
+sse4_2_test (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ char c[NUM *16];
+ } src1, src2;
+ __m128i res, correct;
+ int correct_flags;
+ int flags, cf, zf, sf, of, af;
+ int i;
+
+ for (i = 0; i < NUM *16; i++)
+ {
+ src1.c[i] = rand ();
+ src2.c[i] = rand ();
+ }
+
+ for (i = 0; i < NUM; i++)
+ {
+ switch ((rand () % 4))
+ {
+ case 0:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL0);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL0);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL0);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL0);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL0);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL0);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL0,
+ &correct_flags);
+ break;
+
+ case 1:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL1);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL1);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL1);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL1);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL1);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL1);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL1,
+ &correct_flags);
+ break;
+
+ case 2:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL2);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL2);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL2);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL2);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL2);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL2);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL2,
+ &correct_flags);
+ break;
+
+ default:
+ res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL3);
+ cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL3);
+ zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL3);
+ sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL3);
+ of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL3);
+ af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL3);
+ correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL3,
+ &correct_flags);
+ break;
+ }
+
+ if (memcmp (&correct, &res, sizeof (res)))
+ abort ();
+
+ flags = 0;
+ if (cf)
+ flags |= CFLAG;
+ if (zf)
+ flags |= ZFLAG;
+ if (sf)
+ flags |= SFLAG;
+ if (of)
+ flags |= OFLAG;
+
+ if (flags != correct_flags
+ || (af && (cf || zf))
+ || (!af && !(cf || zf)))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h
new file mode 100644
index 0000000..49eb4bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h
@@ -0,0 +1,447 @@
+#include <nmmintrin.h>
+#include <string.h>
+
+#define CFLAG 0x00000001
+#define ZFLAG 0x00000002
+#define SFLAG 0x00000004
+#define OFLAG 0x00000008
+#define AFLAG 0x00000010
+#define PFLAG 0x00000020
+
+#define PCMPSTR_EQ(X, Y, RES) \
+ { \
+ int __size = (sizeof (*X) ^ 3) * 8; \
+ int __i, __j; \
+ for (__i = 0; __i < __size; __i++) \
+ for (__j = 0; __j < __size; __j++) \
+ RES[__j][__i] = (X[__i] == Y[__j]); \
+ }
+
+#define PCMPSTR_RNG(X, Y, RES) \
+ { \
+ int __size = (sizeof (*X) ^ 3) * 8; \
+ int __i, __j; \
+ for (__j = 0; __j < __size; __j++) \
+ for (__i = 0; __i < __size - 1; __i += 2) \
+ { \
+ RES[__j][__i] = (Y[__j] >= X[__i]); \
+ RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \
+ } \
+ }
+
+static void
+override_invalid (unsigned char res[16][16], int la, int lb,
+ const int mode, int dim)
+{
+ int i, j;
+
+ for (j = 0; j < dim; j++)
+ for (i = 0; i < dim; i++)
+ if (i < la && j >= lb)
+ res[j][i] = 0;
+ else if (i >= la)
+ switch ((mode & 0x0C))
+ {
+ case SIDD_CMP_EQUAL_ANY:
+ case SIDD_CMP_RANGES:
+ res[j][i] = 0;
+ break;
+ case SIDD_CMP_EQUAL_EACH:
+ res[j][i] = (j >= lb) ? 1: 0;
+ break;
+ case SIDD_CMP_EQUAL_ORDERED:
+ res[j][i] = 1;
+ break;
+ }
+}
+
+static void
+calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
+ unsigned char res[16][16])
+{
+ union
+ {
+ __m128i x;
+ signed char sc[16];
+ unsigned char uc[16];
+ signed short ss[8];
+ unsigned short us[8];
+ } d, s;
+
+ d.x = a;
+ s.x = b;
+
+ switch ((mode & 3))
+ {
+ case SIDD_UBYTE_OPS:
+ if ((mode & 0x0C) == SIDD_CMP_RANGES)
+ {
+ PCMPSTR_RNG (d.uc, s.uc, res);
+ }
+ else
+ {
+ PCMPSTR_EQ (d.uc, s.uc, res);
+ }
+ break;
+ case SIDD_UWORD_OPS:
+ if ((mode & 0x0C) == SIDD_CMP_RANGES)
+ {
+ PCMPSTR_RNG (d.us, s.us, res);
+ }
+ else
+ {
+ PCMPSTR_EQ (d.us, s.us, res);
+ }
+ break;
+ case SIDD_SBYTE_OPS:
+ if ((mode & 0x0C) == SIDD_CMP_RANGES)
+ {
+ PCMPSTR_RNG (d.sc, s.sc, res);
+ }
+ else
+ {
+ PCMPSTR_EQ (d.sc, s.sc, res);
+ }
+ break;
+ case SIDD_SWORD_OPS:
+ if ((mode & 0x0C) == SIDD_CMP_RANGES)
+ {
+ PCMPSTR_RNG (d.ss, s.ss, res);
+ }
+ else
+ {
+ PCMPSTR_EQ (d.ss, s.ss, res);
+ }
+ break;
+ }
+
+ override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
+}
+
+static int
+calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
+{
+ unsigned char mtx[16][16];
+ int i, j, k, dim, res = 0;
+
+ memset (mtx, 0, sizeof (mtx));
+
+ dim = (mode & 1) == 0 ? 16 : 8;
+
+ if (la < 0)
+ la = -la;
+
+ if (lb < 0)
+ lb = -lb;
+
+ if (la > dim)
+ la = dim;
+
+ if (lb > dim)
+ lb = dim;
+
+ calc_matrix (a, la, b, lb, mode, mtx);
+
+ switch ((mode & 0x0C))
+ {
+ case SIDD_CMP_EQUAL_ANY:
+ for (i = 0; i < dim; i++)
+ for (j = 0; j < dim; j++)
+ if (mtx[i][j])
+ res |= (1 << i);
+ break;
+
+ case SIDD_CMP_RANGES:
+ for (i = 0; i < dim; i += 2)
+ for(j = 0; j < dim; j++)
+ if (mtx[j][i] && mtx[j][i+1])
+ res |= (1 << j);
+ break;
+
+ case SIDD_CMP_EQUAL_EACH:
+ for(i = 0; i < dim; i++)
+ if (mtx[i][i])
+ res |= (1 << i);
+ break;
+
+ case SIDD_CMP_EQUAL_ORDERED:
+ for(i = 0; i < dim; i++)
+ {
+ unsigned char val = 1;
+
+ for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
+ val &= mtx[k][j];
+
+ if (val)
+ res |= (1 << i);
+ else
+ res &= ~(1 << i);
+ }
+ break;
+ }
+
+ switch ((mode & 0x30))
+ {
+ case SIDD_POSITIVE_POLARITY:
+ case SIDD_MASKED_POSITIVE_POLARITY:
+ break;
+
+ case SIDD_NEGATIVE_POLARITY:
+ res ^= -1;
+ break;
+
+ case SIDD_MASKED_NEGATIVE_POLARITY:
+ for (i = 0; i < lb; i++)
+ if (res & (1 << i))
+ res &= ~(1 << i);
+ else
+ res |= (1 << i);
+ break;
+ }
+
+ return res & ((dim == 8) ? 0xFF : 0xFFFF);
+}
+
+static int
+cmp_flags (__m128i a, int la, __m128i b, int lb,
+ int mode, int res2, int is_implicit)
+{
+ int i;
+ int flags = 0;
+ int is_bytes_mode = (mode & 1) == 0;
+ union
+ {
+ __m128i x;
+ unsigned char uc[16];
+ unsigned short us[8];
+ } d, s;
+
+ d.x = a;
+ s.x = b;
+
+ /* CF: reset if (RES2 == 0), set otherwise. */
+ if (res2 != 0)
+ flags |= CFLAG;
+
+ if (is_implicit)
+ {
+ /* ZF: set if any byte/word of src xmm operand is null, reset
+ otherwise.
+ SF: set if any byte/word of dst xmm operand is null, reset
+ otherwise. */
+
+ if (is_bytes_mode)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ if (s.uc[i] == 0)
+ flags |= ZFLAG;
+ if (d.uc[i] == 0)
+ flags |= SFLAG;
+ }
+ }
+ else
+ {
+ for (i = 0; i < 8; i++)
+ {
+ if (s.us[i] == 0)
+ flags |= ZFLAG;
+ if (d.us[i] == 0)
+ flags |= SFLAG;
+ }
+ }
+ }
+ else
+ {
+ /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
+ SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */
+ int max_ind = is_bytes_mode ? 16 : 8;
+
+ if (la < 0)
+ la = -la;
+ if (lb < 0)
+ lb = -lb;
+
+ if (lb < max_ind)
+ flags |= ZFLAG;
+ if (la < max_ind)
+ flags |= SFLAG;
+ }
+
+ /* OF: equal to RES2[0]. */
+ if ((res2 & 0x1))
+ flags |= OFLAG;
+
+ /* AF: Reset.
+ PF: Reset. */
+ return flags;
+}
+
+static int
+cmp_indexed (__m128i a, int la, __m128i b, int lb,
+ const int mode, int *res2)
+{
+ int i, ndx;
+ int dim = (mode & 1) == 0 ? 16 : 8;
+ int r2;
+
+ r2 = calc_res (a, la, b, lb, mode);
+
+ ndx = dim;
+ if ((mode & 0x40))
+ {
+ for (i = dim - 1; i >= 0; i--)
+ if (r2 & (1 << i))
+ {
+ ndx = i;
+ break;
+ }
+ }
+ else
+ {
+ for (i = 0; i < dim; i++)
+ if ((r2 & (1 << i)))
+ {
+ ndx = i;
+ break;
+ }
+ }
+
+ *res2 = r2;
+ return ndx;
+}
+
+static __m128i
+cmp_masked (__m128i a, int la, __m128i b, int lb,
+ const int mode, int *res2)
+{
+ union
+ {
+ __m128i x;
+ char c[16];
+ short s[8];
+ } ret;
+ int i;
+ int dim = (mode & 1) == 0 ? 16 : 8;
+ union
+ {
+ int i;
+ char c[4];
+ short s[2];
+ } r2;
+
+ r2.i = calc_res (a, la, b, lb, mode);
+
+ memset (&ret, 0, sizeof (ret));
+
+ if (mode & 0x40)
+ {
+ for (i = 0; i < dim; i++)
+ if (dim == 8)
+ ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
+ else
+ ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
+ }
+ else
+ {
+ if (dim == 16)
+ ret.s[0] = r2.s[0];
+ else
+ ret.c[0] = r2.c[0];
+ }
+
+ *res2 = r2.i;
+
+ return ret.x;
+}
+
+static int
+calc_str_len (__m128i a, const int mode)
+{
+ union
+ {
+ __m128i x;
+ char c[16];
+ short s[8];
+ } s;
+ int i;
+ int dim = (mode & 1) == 0 ? 16 : 8;
+
+ s.x = a;
+
+ if ((mode & 1))
+ {
+ for (i = 0; i < dim; i++)
+ if (s.s[i] == 0)
+ break;
+ }
+ else
+ {
+ for (i = 0; i < dim; i++)
+ if (s.c[i] == 0)
+ break;
+ }
+
+ return i;
+}
+
+static inline int
+cmp_ei (__m128i *a, int la, __m128i *b, int lb,
+ const int mode, int *flags)
+{
+ int res2;
+ int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
+
+ if (flags != NULL)
+ *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
+
+ return index;
+}
+
+static inline int
+cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
+{
+ int la, lb;
+ int res2;
+ int index;
+
+ la = calc_str_len (*a, mode);
+ lb = calc_str_len (*b, mode);
+
+ index = cmp_indexed (*a, la, *b, lb, mode, &res2);
+
+ if (flags != NULL)
+ *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
+
+ return index;
+}
+
+static inline __m128i
+cmp_em (__m128i *a, int la, __m128i *b, int lb,
+ const int mode, int *flags )
+{
+ int res2;
+ __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
+
+ if (flags != NULL)
+ *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
+
+ return mask;
+}
+
+static inline __m128i
+cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
+{
+ int la, lb;
+ int res2;
+ __m128i mask;
+
+ la = calc_str_len (*a, mode);
+ lb = calc_str_len (*b, mode);
+
+ mask = cmp_masked (*a, la, *b, lb, mode, &res2);
+ if (flags != NULL)
+ *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
+
+ return mask;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h b/gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h
new file mode 100644
index 0000000..ce06ba1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h
@@ -0,0 +1,41 @@
+#include "sse4_2-check.h"
+
+#include <nmmintrin.h>
+
+#define NUM 1024
+
+static int
+compute_popcnt (TYPE v)
+{
+ int ret;
+ int i;
+
+ ret = 0;
+ for (i = 0; i < sizeof(v) * 8; i++)
+ if ((v & ((TYPE)1 << (TYPE) i)))
+ ret++;
+
+ return ret;
+}
+
+static void
+sse4_2_test (void)
+{
+ int i;
+ TYPE vals[NUM];
+ TYPE res;
+
+ for (i = 0; i < NUM; i++)
+ {
+ vals[i] = rand ();
+ if (sizeof (TYPE) > 4)
+ vals[i] |= (TYPE)rand() << (TYPE)(sizeof (TYPE) * 4);
+ }
+
+ for (i=0; i < NUM; i++)
+ {
+ res = POPCNT (vals[i]);
+ if (res != compute_popcnt (vals[i]))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c b/gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c
new file mode 100644
index 0000000..69a32b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c
@@ -0,0 +1,8 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#define TYPE unsigned int
+#define POPCNT _mm_popcnt_u32
+
+#include "sse4_2-popcnt.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c b/gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c
new file mode 100644
index 0000000..7847393
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c
@@ -0,0 +1,8 @@
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.2" } */
+
+#define TYPE unsigned long long
+#define POPCNT _mm_popcnt_u64
+
+#include "sse4_2-popcnt.h"