diff options
20 files changed, 1500 insertions, 0 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 759dd3c..23d3294 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,25 @@ +2007-06-02 H.J. Lu <hongjiu.lu@intel.com> + + * gcc.target/i386/sse4_2-check.h: New. + * gcc.target/i386/sse4_2-crc32b.c: Likewise. + * gcc.target/i386/sse4_2-crc32.h: Likewise. + * gcc.target/i386/sse4_2-crc32l.c: Likewise. + * gcc.target/i386/sse4_2-crc32q.c: Likewise. + * gcc.target/i386/sse4_2-crc32w.c: Likewise. + * gcc.target/i386/sse4_2-pcmpestri-1.c: Likewise. + * gcc.target/i386/sse4_2-pcmpestri-2.c: Likewise. + * gcc.target/i386/sse4_2-pcmpestrm-1.c: Likewise. + * gcc.target/i386/sse4_2-pcmpestrm-2.c: Likewise. + * gcc.target/i386/sse4_2-pcmpgtq.c: Likewise. + * gcc.target/i386/sse4_2-pcmpistri-1.c: Likewise. + * gcc.target/i386/sse4_2-pcmpistri-2.c: Likewise. + * gcc.target/i386/sse4_2-pcmpistrm-1.c: Likewise. + * gcc.target/i386/sse4_2-pcmpistrm-2.c: Likewise. + * gcc.target/i386/sse4_2-pcmpstr.h: Likewise. + * gcc.target/i386/sse4_2-popcnt.h: Likewise. + * gcc.target/i386/sse4_2-popcntl.c: Likewise. + * gcc.target/i386/sse4_2-popcntq.c: Likewise. + 2007-06-01 Geoffrey Keating <geoffk@apple.com> * gcc.dg/pie-link.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-check.h b/gcc/testsuite/gcc.target/i386/sse4_2-check.h new file mode 100644 index 0000000..49bc0b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-check.h @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "../../gcc.dg/i386-cpuid.h" + +static void sse4_2_test (void); + +int +main () +{ + unsigned long cpu_facilities; + + cpu_facilities = i386_cpuid_ecx (); + + /* Run SSE4.2 test only if host has SSE4.2 support. */ + if ((cpu_facilities & bit_SSE4_2)) + sse4_2_test (); + + exit (0); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32.h b/gcc/testsuite/gcc.target/i386/sse4_2-crc32.h new file mode 100644 index 0000000..c0bcd16 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32.h @@ -0,0 +1,163 @@ +#include "sse4_2-check.h" + +#include <nmmintrin.h> +#include <string.h> + +#define POLYNOMIAL 0x11EDC6F41LL + +#define MAX_BUF 16 + +static void +shift_mem_by1 (unsigned char* buf, int len) +{ + int i; + + for (i = len - 1; i >= 0; i--) + { + buf[i] = buf[i] << 1; + if (i > 0 && (buf[i-1] & 0x80)) + buf[i] |= 1; + } +} + +static void +do_div (unsigned char* buf, unsigned char* div) +{ + int i; + for (i = 0; i < 5; i++) + buf[i] ^= div[i]; +} + +static unsigned int +calc_rem (unsigned char* buf, int len) +{ + union + { + unsigned long long ll; + unsigned char c[8]; + } divisor; + union + { + unsigned int i; + unsigned char c[4]; + } ret; + unsigned char *div_buf; + unsigned char divident[MAX_BUF]; + int disp = len / 8; + int i; + + divisor.ll = POLYNOMIAL << 7LL; + + memcpy (divident, buf, disp); + + div_buf = divident + disp - 5; + + for (i = 0; i < len - 32; i++) + { + if ((div_buf[4] & 0x80)) + do_div (div_buf, divisor.c); + shift_mem_by1 (divident, disp); + } + + memcpy (ret.c, div_buf + 1, sizeof (ret)); + return ret.i; +} + +static void +reverse_bits (unsigned char *src, int len) +{ + unsigned char buf[MAX_BUF]; + unsigned char *tmp = buf + len - 1; + unsigned char ch; + int i, j; + + for (i = 0; i < len; i++) + { + ch = 0; + for (j = 0; j < 8; j++) + if ((src[i] & (1 << j))) + ch |= 1 << (7 - j); + *tmp-- = ch; + } + + for (i = 0; i < len; i++) + src[i] = buf[i]; +} + +static void +shift_mem ( unsigned char *src, unsigned char *dst, int len, int shft) +{ + int disp = shft / 8; + int i; + + memset (dst, 0, len + disp); + for (i = 0; i < len; i++) + dst[i + disp] = src[i]; +} + +static void +xor_mem (unsigned char *src, unsigned char *dst, int len) +{ + int disp = len / 8; + int i; + + for (i = 0; i < disp; i++) + dst[i] ^= src[i]; +} + +static DST_T +compute_crc32 (DST_T crc, SRC_T inp) +{ + unsigned char crcbuf[sizeof (DST_T)]; + unsigned char inbuf[sizeof (SRC_T)]; + unsigned char tmp1[MAX_BUF], tmp2[MAX_BUF]; + int crc_sh, xor_sz; + union + { + unsigned int i; + unsigned char c[4]; + } ret; + + crc_sh = sizeof (SRC_T) * 8; + xor_sz = 32 + crc_sh; + memcpy (crcbuf, &crc, sizeof (DST_T)); + memcpy (inbuf, &inp, sizeof (SRC_T)); + + reverse_bits (crcbuf, 4); + reverse_bits (inbuf, sizeof (SRC_T)); + + shift_mem (inbuf, tmp1, sizeof (SRC_T), 32); + shift_mem (crcbuf, tmp2, 4, crc_sh); + + xor_mem (tmp1, tmp2, xor_sz); + + ret.i = calc_rem (tmp2, xor_sz); + + reverse_bits (ret.c, 4); + + return (DST_T)ret.i; +} + +#define NUM 1024 + +static void +sse4_2_test (void) +{ + DST_T dst[NUM]; + SRC_T src[NUM]; + int i; + + for (i = 0; i < NUM; i++) + { + dst[i] = rand (); + if (sizeof (DST_T) > 4) + dst[i] |= (DST_T)rand () << (DST_T)(sizeof (DST_T) * 4); + src[i] = rand (); + if (sizeof (SRC_T) > 4) + src[i] |= (SRC_T)rand () << (SRC_T)(sizeof (DST_T) * 4); + } + + for (i = 0; i < NUM; i++) + if (CRC32 (dst[i], src[i]) != compute_crc32 (dst[i], src[i])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c new file mode 100644 index 0000000..debede3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32b.c @@ -0,0 +1,9 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#define CRC32 _mm_crc32_u8 +#define DST_T unsigned int +#define SRC_T unsigned char + +#include "sse4_2-crc32.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c new file mode 100644 index 0000000..120101b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32l.c @@ -0,0 +1,9 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#define CRC32 _mm_crc32_u32 +#define DST_T unsigned int +#define SRC_T unsigned int + +#include "sse4_2-crc32.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c new file mode 100644 index 0000000..b6350e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32q.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#define CRC32 _mm_crc32_u64 +#define DST_T unsigned long long +#define SRC_T unsigned long long + +#include "sse4_2-crc32.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c b/gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c new file mode 100644 index 0000000..9671974 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-crc32w.c @@ -0,0 +1,9 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#define CRC32 _mm_crc32_u16 +#define DST_T unsigned int +#define SRC_T unsigned short + +#include "sse4_2-crc32.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c new file mode 100644 index 0000000..8f7a8e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-1.c @@ -0,0 +1,74 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_MOST_SIGNIFICANT) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_LEAST_SIGNIFICANT) + + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + int res, correct, l1, l2; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + l1 = rand () % 18; + l2 = rand () % 18; + + switch ((rand () % 4)) + { + case 0: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0, + NULL); + break; + + case 1: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1, + NULL); + break; + + case 2: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2, + NULL); + break; + + default: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3, + NULL); + break; + } + + if (correct != res) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c new file mode 100644 index 0000000..6f5cdf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestri-2.c @@ -0,0 +1,110 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_MOST_SIGNIFICANT) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_LEAST_SIGNIFICANT) + + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + int res, correct, correct_flags, l1, l2; + int flags, cf, zf, sf, of, af; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + l1 = rand () % 18; + l2 = rand () % 18; + + switch ((rand () % 4)) + { + case 0: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0, + &correct_flags); + break; + + case 1: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1, + &correct_flags); + break; + + case 2: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2, + &correct_flags); + break; + + default: + res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3, + &correct_flags); + break; + } + + if (correct != res) + abort (); + + flags = 0; + if (cf) + flags |= CFLAG; + if (zf) + flags |= ZFLAG; + if (sf) + flags |= SFLAG; + if (of) + flags |= OFLAG; + + if (flags != correct_flags + || (af && (cf || zf)) + || (!af && !(cf || zf))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c new file mode 100644 index 0000000..51bab00 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-1.c @@ -0,0 +1,75 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_BIT_MASK) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK) + + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + __m128i res, correct; + int l1, l2; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + l1 = rand () % 18; + l2 = rand () % 18; + + switch((rand() % 4)) + { + case 0: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0, + NULL); + break; + + case 1: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1, + NULL); + break; + + case 2: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2, + NULL); + break; + + default: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3, + NULL); + break; + } + + if (memcmp (&correct, &res, sizeof (res))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c new file mode 100644 index 0000000..c3dd982 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpestrm-2.c @@ -0,0 +1,110 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_BIT_MASK) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK) + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + __m128i res, correct; + int correct_flags, l1, l2; + int flags, cf, zf, sf, of, af; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + l1 = rand () % 18; + l2 = rand () % 18; + + switch ((rand () % 4)) + { + case 0: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0, + &correct_flags); + break; + + case 1: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1, + &correct_flags); + break; + + case 2: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2, + &correct_flags); + break; + + default: + res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); + correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3, + &correct_flags); + break; + } + + if (memcmp (&correct, &res, sizeof (res))) + abort (); + + flags = 0; + if (cf) + flags |= CFLAG; + if (zf) + flags |= ZFLAG; + if (sf) + flags |= SFLAG; + if (of) + flags |= OFLAG; + + if (flags != correct_flags + || (af && (cf || zf)) + || (!af && !(cf || zf))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c new file mode 100644 index 0000000..a0e7388 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpgtq.c @@ -0,0 +1,38 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" + +#include <nmmintrin.h> + +#define NUM 64 + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + } dst, src1, src2; + int i, sign = 1; + long long is_eq; + + for (i = 0; i < NUM; i++) + { + src1.ll[i] = i * i * sign; + src2.ll[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x[i / 2] = _mm_cmpgt_epi64 (src1.x[i / 2], src2.x[i / 2]); + + for (i = 0; i < NUM; i++) + { + is_eq = src1.ll[i] > src2.ll[i] ? 0xFFFFFFFFFFFFFFFFLL : 0LL; + if (is_eq != dst.ll[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c new file mode 100644 index 0000000..033c07c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-1.c @@ -0,0 +1,67 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_MOST_SIGNIFICANT) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_MOST_SIGNIFICANT) + + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + int res, correct; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + switch ((rand () % 4)) + { + case 0: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL0); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL0, NULL); + break; + + case 1: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL1); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL1, NULL); + break; + + case 2: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL2); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL2, NULL); + break; + + default: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL3); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL3, NULL); + break; + } + + if (correct != res) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c new file mode 100644 index 0000000..d2379d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistri-2.c @@ -0,0 +1,107 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_MOST_SIGNIFICANT) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_MOST_SIGNIFICANT) + + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + int res, correct, correct_flags; + int flags, cf, zf, sf, of, af; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + switch ((rand () % 4)) + { + case 0: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL0); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL0); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL0); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL0); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL0); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL0); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL0, + &correct_flags); + break; + + case 1: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL1); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL1); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL1); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL1); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL1); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL1); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL1, + &correct_flags); + break; + + case 2: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL2); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL2); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL2); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL2); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL2); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL2); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL2, + &correct_flags); + break; + + default: + res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL3); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL3); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL3); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL3); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL3); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL3); + correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL3, + &correct_flags); + break; + } + + if (correct != res) + abort (); + + flags = 0; + if (cf) + flags |= CFLAG; + if (zf) + flags |= ZFLAG; + if (sf) + flags |= SFLAG; + if (of) + flags |= OFLAG; + + if (flags != correct_flags + || (af && (cf || zf)) + || (!af && !(cf || zf))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c new file mode 100644 index 0000000..3d2ef24 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-1.c @@ -0,0 +1,67 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_BIT_MASK) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK) + + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + __m128i res, correct; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + switch((rand() % 4)) + { + case 0: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL0); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL0, NULL); + break; + + case 1: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL1); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL1, NULL); + break; + + case 2: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL2); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL2, NULL); + break; + + default: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL3); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL3, NULL); + break; + } + + if (memcmp (&correct, &res, sizeof (res))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c new file mode 100644 index 0000000..1890da5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpistrm-2.c @@ -0,0 +1,107 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#include "sse4_2-check.h" +#include "sse4_2-pcmpstr.h" + +#define NUM 1024 + +#define IMM_VAL0 \ + (SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY) +#define IMM_VAL1 \ + (SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \ + | SIDD_BIT_MASK) +#define IMM_VAL2 \ + (SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY) +#define IMM_VAL3 \ + (SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \ + | SIDD_POSITIVE_POLARITY | SIDD_UNIT_MASK) + +static void +sse4_2_test (void) +{ + union + { + __m128i x[NUM]; + char c[NUM *16]; + } src1, src2; + __m128i res, correct; + int correct_flags; + int flags, cf, zf, sf, of, af; + int i; + + for (i = 0; i < NUM *16; i++) + { + src1.c[i] = rand (); + src2.c[i] = rand (); + } + + for (i = 0; i < NUM; i++) + { + switch ((rand () % 4)) + { + case 0: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL0); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL0); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL0); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL0); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL0); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL0); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL0, + &correct_flags); + break; + + case 1: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL1); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL1); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL1); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL1); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL1); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL1); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL1, + &correct_flags); + break; + + case 2: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL2); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL2); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL2); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL2); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL2); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL2); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL2, + &correct_flags); + break; + + default: + res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL3); + cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL3); + zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL3); + sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL3); + of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL3); + af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL3); + correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL3, + &correct_flags); + break; + } + + if (memcmp (&correct, &res, sizeof (res))) + abort (); + + flags = 0; + if (cf) + flags |= CFLAG; + if (zf) + flags |= ZFLAG; + if (sf) + flags |= SFLAG; + if (of) + flags |= OFLAG; + + if (flags != correct_flags + || (af && (cf || zf)) + || (!af && !(cf || zf))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h new file mode 100644 index 0000000..49eb4bd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-pcmpstr.h @@ -0,0 +1,447 @@ +#include <nmmintrin.h> +#include <string.h> + +#define CFLAG 0x00000001 +#define ZFLAG 0x00000002 +#define SFLAG 0x00000004 +#define OFLAG 0x00000008 +#define AFLAG 0x00000010 +#define PFLAG 0x00000020 + +#define PCMPSTR_EQ(X, Y, RES) \ + { \ + int __size = (sizeof (*X) ^ 3) * 8; \ + int __i, __j; \ + for (__i = 0; __i < __size; __i++) \ + for (__j = 0; __j < __size; __j++) \ + RES[__j][__i] = (X[__i] == Y[__j]); \ + } + +#define PCMPSTR_RNG(X, Y, RES) \ + { \ + int __size = (sizeof (*X) ^ 3) * 8; \ + int __i, __j; \ + for (__j = 0; __j < __size; __j++) \ + for (__i = 0; __i < __size - 1; __i += 2) \ + { \ + RES[__j][__i] = (Y[__j] >= X[__i]); \ + RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \ + } \ + } + +static void +override_invalid (unsigned char res[16][16], int la, int lb, + const int mode, int dim) +{ + int i, j; + + for (j = 0; j < dim; j++) + for (i = 0; i < dim; i++) + if (i < la && j >= lb) + res[j][i] = 0; + else if (i >= la) + switch ((mode & 0x0C)) + { + case SIDD_CMP_EQUAL_ANY: + case SIDD_CMP_RANGES: + res[j][i] = 0; + break; + case SIDD_CMP_EQUAL_EACH: + res[j][i] = (j >= lb) ? 1: 0; + break; + case SIDD_CMP_EQUAL_ORDERED: + res[j][i] = 1; + break; + } +} + +static void +calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode, + unsigned char res[16][16]) +{ + union + { + __m128i x; + signed char sc[16]; + unsigned char uc[16]; + signed short ss[8]; + unsigned short us[8]; + } d, s; + + d.x = a; + s.x = b; + + switch ((mode & 3)) + { + case SIDD_UBYTE_OPS: + if ((mode & 0x0C) == SIDD_CMP_RANGES) + { + PCMPSTR_RNG (d.uc, s.uc, res); + } + else + { + PCMPSTR_EQ (d.uc, s.uc, res); + } + break; + case SIDD_UWORD_OPS: + if ((mode & 0x0C) == SIDD_CMP_RANGES) + { + PCMPSTR_RNG (d.us, s.us, res); + } + else + { + PCMPSTR_EQ (d.us, s.us, res); + } + break; + case SIDD_SBYTE_OPS: + if ((mode & 0x0C) == SIDD_CMP_RANGES) + { + PCMPSTR_RNG (d.sc, s.sc, res); + } + else + { + PCMPSTR_EQ (d.sc, s.sc, res); + } + break; + case SIDD_SWORD_OPS: + if ((mode & 0x0C) == SIDD_CMP_RANGES) + { + PCMPSTR_RNG (d.ss, s.ss, res); + } + else + { + PCMPSTR_EQ (d.ss, s.ss, res); + } + break; + } + + override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8); +} + +static int +calc_res (__m128i a, int la, __m128i b, int lb, const int mode) +{ + unsigned char mtx[16][16]; + int i, j, k, dim, res = 0; + + memset (mtx, 0, sizeof (mtx)); + + dim = (mode & 1) == 0 ? 16 : 8; + + if (la < 0) + la = -la; + + if (lb < 0) + lb = -lb; + + if (la > dim) + la = dim; + + if (lb > dim) + lb = dim; + + calc_matrix (a, la, b, lb, mode, mtx); + + switch ((mode & 0x0C)) + { + case SIDD_CMP_EQUAL_ANY: + for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + if (mtx[i][j]) + res |= (1 << i); + break; + + case SIDD_CMP_RANGES: + for (i = 0; i < dim; i += 2) + for(j = 0; j < dim; j++) + if (mtx[j][i] && mtx[j][i+1]) + res |= (1 << j); + break; + + case SIDD_CMP_EQUAL_EACH: + for(i = 0; i < dim; i++) + if (mtx[i][i]) + res |= (1 << i); + break; + + case SIDD_CMP_EQUAL_ORDERED: + for(i = 0; i < dim; i++) + { + unsigned char val = 1; + + for (j = 0, k = i; j < dim - i && k < dim; j++, k++) + val &= mtx[k][j]; + + if (val) + res |= (1 << i); + else + res &= ~(1 << i); + } + break; + } + + switch ((mode & 0x30)) + { + case SIDD_POSITIVE_POLARITY: + case SIDD_MASKED_POSITIVE_POLARITY: + break; + + case SIDD_NEGATIVE_POLARITY: + res ^= -1; + break; + + case SIDD_MASKED_NEGATIVE_POLARITY: + for (i = 0; i < lb; i++) + if (res & (1 << i)) + res &= ~(1 << i); + else + res |= (1 << i); + break; + } + + return res & ((dim == 8) ? 0xFF : 0xFFFF); +} + +static int +cmp_flags (__m128i a, int la, __m128i b, int lb, + int mode, int res2, int is_implicit) +{ + int i; + int flags = 0; + int is_bytes_mode = (mode & 1) == 0; + union + { + __m128i x; + unsigned char uc[16]; + unsigned short us[8]; + } d, s; + + d.x = a; + s.x = b; + + /* CF: reset if (RES2 == 0), set otherwise. */ + if (res2 != 0) + flags |= CFLAG; + + if (is_implicit) + { + /* ZF: set if any byte/word of src xmm operand is null, reset + otherwise. + SF: set if any byte/word of dst xmm operand is null, reset + otherwise. */ + + if (is_bytes_mode) + { + for (i = 0; i < 16; i++) + { + if (s.uc[i] == 0) + flags |= ZFLAG; + if (d.uc[i] == 0) + flags |= SFLAG; + } + } + else + { + for (i = 0; i < 8; i++) + { + if (s.us[i] == 0) + flags |= ZFLAG; + if (d.us[i] == 0) + flags |= SFLAG; + } + } + } + else + { + /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise. + SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */ + int max_ind = is_bytes_mode ? 16 : 8; + + if (la < 0) + la = -la; + if (lb < 0) + lb = -lb; + + if (lb < max_ind) + flags |= ZFLAG; + if (la < max_ind) + flags |= SFLAG; + } + + /* OF: equal to RES2[0]. */ + if ((res2 & 0x1)) + flags |= OFLAG; + + /* AF: Reset. + PF: Reset. */ + return flags; +} + +static int +cmp_indexed (__m128i a, int la, __m128i b, int lb, + const int mode, int *res2) +{ + int i, ndx; + int dim = (mode & 1) == 0 ? 16 : 8; + int r2; + + r2 = calc_res (a, la, b, lb, mode); + + ndx = dim; + if ((mode & 0x40)) + { + for (i = dim - 1; i >= 0; i--) + if (r2 & (1 << i)) + { + ndx = i; + break; + } + } + else + { + for (i = 0; i < dim; i++) + if ((r2 & (1 << i))) + { + ndx = i; + break; + } + } + + *res2 = r2; + return ndx; +} + +static __m128i +cmp_masked (__m128i a, int la, __m128i b, int lb, + const int mode, int *res2) +{ + union + { + __m128i x; + char c[16]; + short s[8]; + } ret; + int i; + int dim = (mode & 1) == 0 ? 16 : 8; + union + { + int i; + char c[4]; + short s[2]; + } r2; + + r2.i = calc_res (a, la, b, lb, mode); + + memset (&ret, 0, sizeof (ret)); + + if (mode & 0x40) + { + for (i = 0; i < dim; i++) + if (dim == 8) + ret.s [i] = (r2.i & (1 << i)) ? -1 : 0; + else + ret.c [i] = (r2.i & (1 << i)) ? -1 : 0; + } + else + { + if (dim == 16) + ret.s[0] = r2.s[0]; + else + ret.c[0] = r2.c[0]; + } + + *res2 = r2.i; + + return ret.x; +} + +static int +calc_str_len (__m128i a, const int mode) +{ + union + { + __m128i x; + char c[16]; + short s[8]; + } s; + int i; + int dim = (mode & 1) == 0 ? 16 : 8; + + s.x = a; + + if ((mode & 1)) + { + for (i = 0; i < dim; i++) + if (s.s[i] == 0) + break; + } + else + { + for (i = 0; i < dim; i++) + if (s.c[i] == 0) + break; + } + + return i; +} + +static inline int +cmp_ei (__m128i *a, int la, __m128i *b, int lb, + const int mode, int *flags) +{ + int res2; + int index = cmp_indexed (*a, la, *b, lb, mode, &res2); + + if (flags != NULL) + *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); + + return index; +} + +static inline int +cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags) +{ + int la, lb; + int res2; + int index; + + la = calc_str_len (*a, mode); + lb = calc_str_len (*b, mode); + + index = cmp_indexed (*a, la, *b, lb, mode, &res2); + + if (flags != NULL) + *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); + + return index; +} + +static inline __m128i +cmp_em (__m128i *a, int la, __m128i *b, int lb, + const int mode, int *flags ) +{ + int res2; + __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2); + + if (flags != NULL) + *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); + + return mask; +} + +static inline __m128i +cmp_im (__m128i *a, __m128i *b, const int mode, int *flags) +{ + int la, lb; + int res2; + __m128i mask; + + la = calc_str_len (*a, mode); + lb = calc_str_len (*b, mode); + + mask = cmp_masked (*a, la, *b, lb, mode, &res2); + if (flags != NULL) + *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); + + return mask; +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h b/gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h new file mode 100644 index 0000000..ce06ba1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-popcnt.h @@ -0,0 +1,41 @@ +#include "sse4_2-check.h" + +#include <nmmintrin.h> + +#define NUM 1024 + +static int +compute_popcnt (TYPE v) +{ + int ret; + int i; + + ret = 0; + for (i = 0; i < sizeof(v) * 8; i++) + if ((v & ((TYPE)1 << (TYPE) i))) + ret++; + + return ret; +} + +static void +sse4_2_test (void) +{ + int i; + TYPE vals[NUM]; + TYPE res; + + for (i = 0; i < NUM; i++) + { + vals[i] = rand (); + if (sizeof (TYPE) > 4) + vals[i] |= (TYPE)rand() << (TYPE)(sizeof (TYPE) * 4); + } + + for (i=0; i < NUM; i++) + { + res = POPCNT (vals[i]); + if (res != compute_popcnt (vals[i])) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c b/gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c new file mode 100644 index 0000000..69a32b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-popcntl.c @@ -0,0 +1,8 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#define TYPE unsigned int +#define POPCNT _mm_popcnt_u32 + +#include "sse4_2-popcnt.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c b/gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c new file mode 100644 index 0000000..7847393 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_2-popcntq.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.2" } */ + +#define TYPE unsigned long long +#define POPCNT _mm_popcnt_u64 + +#include "sse4_2-popcnt.h" |