diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2007-05-20 01:27:48 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2007-05-20 01:27:48 +0200 |
commit | 376a4c0534af595af1f2ac7f862967021d58e4af (patch) | |
tree | a29219692e016070f64d880d86e5979aab82e17c | |
parent | dbca09c29d08d561ed9f08f1a1f6cfec86700ef4 (diff) | |
download | gcc-376a4c0534af595af1f2ac7f862967021d58e4af.zip gcc-376a4c0534af595af1f2ac7f862967021d58e4af.tar.gz gcc-376a4c0534af595af1f2ac7f862967021d58e4af.tar.bz2 |
re PR target/31585 (gcc.target/i386/sse-vect-types.c FAILs (also sse-13.c and sse-14.c))
PR target/31585
* config/i386/pmmintrin.h: Do not include xmmintrin.h
* config/i386/xmmintrin.h (_mm_extract_pi16): Implement as always
inlined function, not as a macro.
(_mm_prefetch): Ditto.
(_m_pextrw): Ditto.
(_mm_insert_pi16): Ditto.
(_m_pinsrw): Ditto.
(_mm_shuffle_pi16): Ditto. Add const to __N argument.
(_m_pshufw): Ditto. Add const to __N argument.
(_mm_shufle_ps): Ditto. Add const to __mask argument.
* config/i386/emmintrin.h (_mm_slli_epi16): Add const to __B argument.
(_mm_slli_epi32): Ditto.
(_mm_srli_si128): Implement as always inlined function, not as a macro.
Add __inline to function declaration.
(_mm_slli_si128): Ditto.
testsuite/ChangeLog:
PR target/31585
* gcc.target/i386/sse-13.c: Use -mssse3 and -msse4a compile options.
(__builtin_ia32_psllwi128): Redefine to test with immediate operand.
(__builtin_ia32_psrlqi128): Ditto.
(__builtin_ia32_psrlwi128): Ditto.
(__builtin_ia32_psrldi128): Ditto.
(__builtin_ia32_psrldqi128): Ditto.
(__builtin_ia32_pslldqi128): Ditto.
(__builtin_ia32_psrawi128): Ditto.
(__builtin_ia32_psradi128): Ditto.
(__builtin_ia32_psllqi128): Ditto.
(__builtin_ia32_pslldi128): Ditto.
(__builtin_prefetch): Ditto.
(__builtin_ia32_pshufw): Ditto.
(__builtin_ia32_vec_set_v4hi): Ditto.
(__builtin_ia32_vec_ext_v4hi): Ditto.
(__builtin_ia32_shufps): Ditto.
* gcc.target/i386/sse-14.c: Same changes as sse-13.c.
From-SVN: r124861
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 17 | ||||
-rw-r--r-- | gcc/config/i386/pmmintrin.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 34 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-13.c | 27 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-14.c | 28 |
7 files changed, 99 insertions, 48 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dc0ed07..01a9a63 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2006-05-20 Uros Bizjak <ubizjak@gmail.com> + + PR target/31585 + * config/i386/pmmintrin.h: Do not include xmmintrin.h + * config/i386/xmmintrin.h (_mm_extract_pi16): Implement as always + inlined function, not as a macro. + (_mm_prefetch): Ditto. + (_m_pextrw): Ditto. + (_mm_insert_pi16): Ditto. + (_m_pinsrw): Ditto. + (_mm_shuffle_pi16): Ditto. Add const to __N argument. + (_m_pshufw): Ditto. Add const to __N argument. + (_mm_shufle_ps): Ditto. Add const to __mask argument. + * config/i386/emmintrin.h (_mm_slli_epi16): Add const to __B argument. + (_mm_slli_epi32): Ditto. + (_mm_srli_si128): Implement as always inlined function, not as a macro. + Add __inline to function declaration. + (_mm_slli_si128): Ditto. + 2007-05-19 Uros Bizjak <ubizjak@gmail.com> * config/i386/sfp-machine.h (FP_EX_INVALID, FP_EX_DENORM, diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index a230a70..e8ef024 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1105,13 +1105,13 @@ _mm_mul_epu32 (__m128i __A, __m128i __B) } static __inline __m128i __attribute__((__always_inline__)) -_mm_slli_epi16 (__m128i __A, int __B) +_mm_slli_epi16 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); } static __inline __m128i __attribute__((__always_inline__)) -_mm_slli_epi32 (__m128i __A, int __B) +_mm_slli_epi32 (__m128i __A, const int __B) { return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); } @@ -1134,24 +1134,17 @@ _mm_srai_epi32 (__m128i __A, const int __B) return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); } -#if 0 -static __m128i __attribute__((__always_inline__)) +static __inline __m128i __attribute__((__always_inline__)) _mm_srli_si128 (__m128i __A, const int __B) { return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8)); } -static __m128i __attribute__((__always_inline__)) -_mm_srli_si128 (__m128i __A, const int __B) +static __inline __m128i __attribute__((__always_inline__)) +_mm_slli_si128 (__m128i __A, const int __B) { return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8)); } -#else -#define _mm_srli_si128(__A, __B) \ - ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8)) -#define _mm_slli_si128(__A, __B) \ - ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8)) -#endif static __inline __m128i __attribute__((__always_inline__)) _mm_srli_epi16 (__m128i __A, const int __B) diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h index 39d7c17..09df810 100644 --- a/gcc/config/i386/pmmintrin.h +++ b/gcc/config/i386/pmmintrin.h @@ -35,7 +35,6 @@ #else /* We need definitions from the SSE2 and SSE header files*/ -#include <xmmintrin.h> #include <emmintrin.h> /* Additional bits in the MXCSR. */ diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 57ef330..ac3a59a 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -716,17 +716,11 @@ _mm_cvtps_pi8(__m128 __A) } /* Selects four specific SPFP values from A and B based on MASK. */ -#if 0 static __inline __m128 __attribute__((__always_inline__)) -_mm_shuffle_ps (__m128 __A, __m128 __B, int __mask) +_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) { return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); } -#else -#define _mm_shuffle_ps(A, B, MASK) \ - ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK))) -#endif - /* Selects and interleaves the upper two SPFP values from A and B. */ static __inline __m128 __attribute__((__always_inline__)) @@ -992,7 +986,6 @@ _mm_move_ss (__m128 __A, __m128 __B) } /* Extracts one of the four words of A. The selector N must be immediate. */ -#if 0 static __inline int __attribute__((__always_inline__)) _mm_extract_pi16 (__m64 const __A, int const __N) { @@ -1004,14 +997,9 @@ _m_pextrw (__m64 const __A, int const __N) { return _mm_extract_pi16 (__A, __N); } -#else -#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N)) -#define _m_pextrw(A, N) _mm_extract_pi16((A), (N)) -#endif /* Inserts word D into one of four words of A. The selector N must be immediate. */ -#if 0 static __inline __m64 __attribute__((__always_inline__)) _mm_insert_pi16 (__m64 const __A, int const __D, int const __N) { @@ -1023,11 +1011,6 @@ _m_pinsrw (__m64 const __A, int const __D, int const __N) { return _mm_insert_pi16 (__A, __D, __N); } -#else -#define _mm_insert_pi16(A, D, N) \ - ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N))) -#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N)) -#endif /* Compute the element-wise maximum of signed 16-bit values. */ static __inline __m64 __attribute__((__always_inline__)) @@ -1110,23 +1093,17 @@ _m_pmulhuw (__m64 __A, __m64 __B) /* Return a combination of the four 16-bit values in A. The selector must be an immediate. */ -#if 0 static __inline __m64 __attribute__((__always_inline__)) -_mm_shuffle_pi16 (__m64 __A, int __N) +_mm_shuffle_pi16 (__m64 __A, int const __N) { return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N); } static __inline __m64 __attribute__((__always_inline__)) -_m_pshufw (__m64 __A, int __N) +_m_pshufw (__m64 __A, int const __N) { return _mm_shuffle_pi16 (__A, __N); } -#else -#define _mm_shuffle_pi16(A, N) \ - ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N))) -#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N)) -#endif /* Conditionally store byte elements of A into P. The high bit of each byte in the selector N determines whether the corresponding byte from @@ -1186,16 +1163,11 @@ _m_psadbw (__m64 __A, __m64 __B) /* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */ -#if 0 static __inline void __attribute__((__always_inline__)) _mm_prefetch (void *__P, enum _mm_hint __I) { __builtin_prefetch (__P, 0, __I); } -#else -#define _mm_prefetch(P, I) \ - __builtin_prefetch ((P), 0, (I)) -#endif /* Stores the data in A to the address P without polluting the caches. */ static __inline void __attribute__((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 787e256..42f0c69 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2006-05-20 Uros Bizjak <ubizjak@gmail.com> + + PR target/31585 + * gcc.target/i386/sse-13.c: Use "-mssse3 -msse4a" compile options. + (__builtin_ia32_psllwi128): Redefine to test with immediate operand. + (__builtin_ia32_psrlqi128): Ditto. + (__builtin_ia32_psrlwi128): Ditto. + (__builtin_ia32_psrldi128): Ditto. + (__builtin_ia32_psrldqi128): Ditto. + (__builtin_ia32_pslldqi128): Ditto. + (__builtin_ia32_psrawi128): Ditto. + (__builtin_ia32_psradi128): Ditto. + (__builtin_ia32_psllqi128): Ditto. + (__builtin_ia32_pslldi128): Ditto. + (__builtin_prefetch): Ditto. + (__builtin_ia32_pshufw): Ditto. + (__builtin_ia32_vec_set_v4hi): Ditto. + (__builtin_ia32_vec_ext_v4hi): Ditto. + (__builtin_ia32_shufps): Ditto. + * gcc.target/i386/sse-14.c: Same changes as sse-13.c. + 2007-05-19 Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> PR fortran/31974 diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 1f657e1..6bba000 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O2 -msse" } */ +/* { dg-options "-O2 -mssse3 -msse4a" } */ /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in mmintrin.h that reference the proper @@ -9,4 +9,27 @@ #define static #define __inline -#include <xmmintrin.h> +/* Following intrinsics require immediate arguments. */ + +/* emmintrin.h */ +#define __builtin_ia32_psllwi128(A, B) __builtin_ia32_psllwi128(A, 1) +#define __builtin_ia32_psrlqi128(A, B) __builtin_ia32_psrlqi128(A, 1) +#define __builtin_ia32_psrlwi128(A, B) __builtin_ia32_psrlwi128(A, 1) +#define __builtin_ia32_psrldi128(A, B) __builtin_ia32_psrldi128(A, 1) +#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8) +#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8) +#define __builtin_ia32_psrawi128(A, B) __builtin_ia32_psrawi128(A, 1) +#define __builtin_ia32_psradi128(A, B) __builtin_ia32_psradi128(A, 1) +#define __builtin_ia32_psllqi128(A, B) __builtin_ia32_psllqi128(A, 1) +#define __builtin_ia32_pslldi128(A, B) __builtin_ia32_pslldi128(A, 1) + +/* xmmintrin.h */ +#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA) +#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) +#define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) +#define __builtin_ia32_shufps(A, B, C) __builtin_ia32_shufps(A, B, 0) + +#include <ammintrin.h> +#include <tmmintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 38e4e56..d999380 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O0 -msse" } */ +/* { dg-options "-O0 -mssse3 -msse4a" } */ /* Test that the intrinsics compile without optimization. All of them are defined as inline functions in mmintrin.h that reference the proper @@ -9,4 +9,28 @@ #define static #define __inline -#include <xmmintrin.h> +/* Following intrinsics require immediate arguments. */ + +/* emmintrin.h */ +#define __builtin_ia32_psllwi128(A, B) __builtin_ia32_psllwi128(A, 1) +#define __builtin_ia32_psrlqi128(A, B) __builtin_ia32_psrlqi128(A, 1) +#define __builtin_ia32_psrlwi128(A, B) __builtin_ia32_psrlwi128(A, 1) +#define __builtin_ia32_psrldi128(A, B) __builtin_ia32_psrldi128(A, 1) +#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8) +#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8) +#define __builtin_ia32_psrawi128(A, B) __builtin_ia32_psrawi128(A, 1) +#define __builtin_ia32_psradi128(A, B) __builtin_ia32_psradi128(A, 1) +#define __builtin_ia32_psllqi128(A, B) __builtin_ia32_psllqi128(A, 1) +#define __builtin_ia32_pslldi128(A, B) __builtin_ia32_pslldi128(A, 1) + +/* xmmintrin.h */ +#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA) +#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) +#define __builtin_ia32_vec_set_v4hi(A, D, N) \ + __builtin_ia32_vec_set_v4hi(A, D, 0) +#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) +#define __builtin_ia32_shufps(A, B, C) __builtin_ia32_shufps(A, B, 0) + +#include <ammintrin.h> +#include <tmmintrin.h> + |