diff options
author | Paul A. Clarke <pc@us.ibm.com> | 2021-07-22 12:54:33 -0700 |
---|---|---|
committer | Paul A. Clarke <pc@us.ibm.com> | 2021-10-11 20:26:15 -0500 |
commit | 285d75a45469a438f07a93a92c700d7103c082de (patch) | |
tree | b8041227b021453adfccf47acc0b261282811190 /gcc | |
parent | 1ec08caf7e009053d21b331bd98679fec4ff2900 (diff) | |
download | gcc-285d75a45469a438f07a93a92c700d7103c082de.zip gcc-285d75a45469a438f07a93a92c700d7103c082de.tar.gz gcc-285d75a45469a438f07a93a92c700d7103c082de.tar.bz2 |
rs6000: Support SSE4.1 "cvt" intrinsics
Function signatures and decorations match gcc/config/i386/smmintrin.h.
Also, copy tests for:
- _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64
- _mm_cvtepi16_epi32, _mm_cvtepi16_epi64
- _mm_cvtepi32_epi64,
- _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64
- _mm_cvtepu16_epi32, _mm_cvtepu16_epi64
- _mm_cvtepu32_epi64
from gcc/testsuite/gcc.target/i386.
sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were
modified from using "char" types to "signed char" types, because
the default is unsigned on powerpc.
2021-10-11 Paul A. Clarke <pc@us.ibm.com>
gcc
* config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
_mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
_mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
_mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
_mm_cvtepu32_epi64): New.
gcc/testsuite
* gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
adjust dg directives to suit.
* gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/rs6000/smmintrin.h | 138 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c | 43 |
13 files changed, 648 insertions, 0 deletions
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index f935ab0..ad6b68e 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -330,6 +330,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y) return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); } +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi16 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v16qi) __A); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi32 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v16qi) __A); + return (__m128i) vec_unpackh ((__v8hi) __A); +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi64 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v16qi) __A); + __A = (__m128i) vec_unpackh ((__v8hi) __A); + return (__m128i) vec_unpackh ((__v4si) __A); +} +#endif + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_epi32 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v8hi) __A); +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_epi64 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v8hi) __A); + return (__m128i) vec_unpackh ((__v4si) __A); +} +#endif + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi32_epi64 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v4si) __A); +} +#endif + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu8_epi16 (__m128i __A) +{ + const __v16qu __zero = {0}; +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero); +#else /* __BIG_ENDIAN__. */ + __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A); +#endif /* __BIG_ENDIAN__. */ + return __A; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu8_epi32 (__m128i __A) +{ + const __v16qu __zero = {0}; +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero); + __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero); +#else /* __BIG_ENDIAN__. */ + __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A); + __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A); +#endif /* __BIG_ENDIAN__. */ + return __A; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu8_epi64 (__m128i __A) +{ + const __v16qu __zero = {0}; +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero); + __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero); + __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero); +#else /* __BIG_ENDIAN__. */ + __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A); + __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A); + __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A); +#endif /* __BIG_ENDIAN__. */ + return __A; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu16_epi32 (__m128i __A) +{ + const __v8hu __zero = {0}; +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero); +#else /* __BIG_ENDIAN__. */ + __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A); +#endif /* __BIG_ENDIAN__. */ + return __A; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu16_epi64 (__m128i __A) +{ + const __v8hu __zero = {0}; +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero); + __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero); +#else /* __BIG_ENDIAN__. */ + __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A); + __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A); +#endif /* __BIG_ENDIAN__. */ + return __A; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu32_epi64 (__m128i __A) +{ + const __v4su __zero = {0}; +#ifdef __LITTLE_ENDIAN__ + __A = (__m128i) vec_mergeh ((__v4su) __A, __zero); +#else /* __BIG_ENDIAN__. */ + __A = (__m128i) vec_mergeh (__zero, (__v4su) __A); +#endif /* __BIG_ENDIAN__. */ + return __A; +} + /* Return horizontal packed word minimum and its index in bits [15:0] and bits [18:16] respectively. */ __inline __m128i diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c new file mode 100644 index 0000000..99cca61 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + signed char c[NUM * 4]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 4) + (i / 4) * 16] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c new file mode 100644 index 0000000..9ec1ab7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + signed char c[NUM * 8]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 2) + (i / 2) * 16] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepi8_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c new file mode 100644 index 0000000..805897d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 8]; + short s[NUM]; + signed char c[NUM * 2]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 8) + (i / 8) * 16] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 8) + dst.x [i / 8] = _mm_cvtepi8_epi16 (src.x [i / 8]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c new file mode 100644 index 0000000..1c26378 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + int i[NUM * 2]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.i[(i % 2) + (i / 2) * 4] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepi32_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c new file mode 100644 index 0000000..43f30f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + short s[NUM * 2]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 4) + (i / 4) * 8] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepi16_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c new file mode 100644 index 0000000..6786469 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + short s[NUM * 4]; + } dst, src; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 2) + (i / 2) * 8] = i * i * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepi16_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c new file mode 100644 index 0000000..643a2a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 4]; + unsigned int i[NUM]; + unsigned char c[NUM * 4]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 4) + (i / 4) * 16] = i * i; + if ((i % 4)) + src.c[(i % 4) + (i / 4) * 16] |= 0x80; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepu8_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c new file mode 100644 index 0000000..871f425 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + unsigned long long ll[NUM]; + unsigned char c[NUM * 8]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 2) + (i / 2) * 16] = i * i; + if ((i % 2)) + src.c[(i % 2) + (i / 2) * 16] |= 0x80; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepu8_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 2) + (i / 2) * 16] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c new file mode 100644 index 0000000..ee89ebc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 8]; + unsigned short s[NUM]; + unsigned char c[NUM * 2]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.c[(i % 8) + (i / 8) * 16] = i * i; + if ((i % 4)) + src.c[(i % 8) + (i / 8) * 16] |= 0x80; + } + + for (i = 0; i < NUM; i += 8) + dst.x [i / 8] = _mm_cvtepu8_epi16 (src.x [i / 8]); + + for (i = 0; i < NUM; i++) + if (src.c[(i % 8) + (i / 8) * 16] != dst.s[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c new file mode 100644 index 0000000..3ec28ab --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + unsigned long long ll[NUM]; + unsigned int i[NUM * 2]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.i[(i % 2) + (i / 2) * 4] = i * i; + if ((i % 2)) + src.i[(i % 2) + (i / 2) * 4] |= 0x80000000; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepu32_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.i[(i % 2) + (i / 2) * 4] != dst.ll[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c new file mode 100644 index 0000000..decd9ff --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 4]; + unsigned int i[NUM]; + unsigned short s[NUM * 2]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 4) + (i / 4) * 8] = i * i; + if ((i % 4)) + src.s[(i % 4) + (i / 4) * 8] |= 0x8000; + } + + for (i = 0; i < NUM; i += 4) + dst.x [i / 4] = _mm_cvtepu16_epi32 (src.x [i / 4]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 4) + (i / 4) * 8] != dst.i[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c new file mode 100644 index 0000000..0383044 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 128 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + unsigned long long ll[NUM]; + unsigned short s[NUM * 4]; + } dst, src; + int i; + + for (i = 0; i < NUM; i++) + { + src.s[(i % 2) + (i / 2) * 8] = i * i; + if ((i % 2)) + src.s[(i % 2) + (i / 2) * 8] |= 0x8000; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cvtepu16_epi64 (src.x [i / 2]); + + for (i = 0; i < NUM; i++) + if (src.s[(i % 2) + (i / 2) * 8] != dst.ll[i]) + abort (); +} |