diff options
author | Marc Glisse <marc.glisse@inria.fr> | 2016-08-31 13:56:37 +0200 |
---|---|---|
committer | Marc Glisse <glisse@gcc.gnu.org> | 2016-08-31 11:56:37 +0000 |
commit | c6b0037d0e25340cd40d8fe01cbd2abe75c1f951 (patch) | |
tree | 26495e00ae9000737c30e9b6030906ee51a2cb6e | |
parent | c1105fa7e43a9169187012010eeee81893e6a3cc (diff) | |
download | gcc-c6b0037d0e25340cd40d8fe01cbd2abe75c1f951.zip gcc-c6b0037d0e25340cd40d8fe01cbd2abe75c1f951.tar.gz gcc-c6b0037d0e25340cd40d8fe01cbd2abe75c1f951.tar.bz2 |
[x86] Don't use builtins for unaligned load/store
2016-08-31 Marc Glisse <marc.glisse@inria.fr>
gcc/
* config/i386/avx512fintrin.h (__m512_u, __m512i_u, __m512d_u):
New types.
(_mm512_loadu_pd, _mm512_storeu_pd, _mm512_loadu_ps,
_mm512_storeu_ps, _mm512_loadu_si512, _mm512_storeu_si512):
Replace builtin with vector extension.
* config/i386/avxintrin.h (__m256_u, __m256i_u, __m256d_u):
New types.
(_mm256_loadu_pd, _mm256_storeu_pd, _mm256_loadu_ps,
_mm256_storeu_ps, _mm256_loadu_si256, _mm256_storeu_si256):
Replace builtin with vector extension.
* config/i386/emmintrin.h (__m128i_u, __m128d_u): New types.
(_mm_loadu_pd, _mm_storeu_pd, _mm_loadu_si128, _mm_storeu_si128):
Replace builtin with vector extension.
* config/i386/xmmintrin.h (__m128_u): New type.
(_mm_loadu_ps, _mm_storeu_ps): Replace builtin with vector extension.
(_mm_load_ps, _mm_store_ps): Simplify.
gcc/testsuite/
* gcc.target/i386/pr59539-2.c: Adapt options.
* gcc.target/i386/avx512f-vmovdqu32-1.c: Relax expected asm.
From-SVN: r239889
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/i386/avx512fintrin.h | 29 | ||||
-rw-r--r-- | gcc/config/i386/avxintrin.h | 27 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 16 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 11 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr59539-2.c | 2 |
8 files changed, 74 insertions, 39 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e9dd537..e9dfc91 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2016-08-31 Marc Glisse <marc.glisse@inria.fr> + + * config/i386/avx512fintrin.h (__m512_u, __m512i_u, __m512d_u): + New types. + (_mm512_loadu_pd, _mm512_storeu_pd, _mm512_loadu_ps, + _mm512_storeu_ps, _mm512_loadu_si512, _mm512_storeu_si512): + Replace builtin with vector extension. + * config/i386/avxintrin.h (__m256_u, __m256i_u, __m256d_u): + New types. + (_mm256_loadu_pd, _mm256_storeu_pd, _mm256_loadu_ps, + _mm256_storeu_ps, _mm256_loadu_si256, _mm256_storeu_si256): + Replace builtin with vector extension. + * config/i386/emmintrin.h (__m128i_u, __m128d_u): New types. + (_mm_loadu_pd, _mm_storeu_pd, _mm_loadu_si128, _mm_storeu_si128): + Replace builtin with vector extension. + * config/i386/xmmintrin.h (__m128_u): New type. + (_mm_loadu_ps, _mm_storeu_ps): Replace builtin with vector extension. + (_mm_load_ps, _mm_store_ps): Simplify. + 2016-08-31 Eric Botcazou <ebotcazou@adacore.com> * config/arm/arm.c (thumb1_size_rtx_costs) <SET>: Add missing guard. diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 688e8dc..2372c83 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -52,6 +52,11 @@ typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); +/* Unaligned version of the same type. */ +typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); +typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); +typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); + typedef unsigned char __mmask8; typedef unsigned short __mmask16; @@ -5674,10 +5679,7 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_loadu_pd (void const *__P) { - return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1); + return *(__m512d_u *)__P; } extern __inline __m512d @@ -5703,8 +5705,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_storeu_pd (void *__P, __m512d __A) { - __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A, - (__mmask8) -1); + *(__m512d_u *)__P = __A; } extern __inline void @@ -5719,10 +5720,7 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_loadu_ps (void const *__P) { - return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return *(__m512_u *)__P; } extern __inline __m512 @@ -5748,8 +5746,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_storeu_ps (void *__P, __m512 __A) { - __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A, - (__mmask16) -1); + *(__m512_u *)__P = __A; } extern __inline void @@ -5791,10 +5788,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_loadu_si512 (void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return *(__m512i_u *)__P; } extern __inline __m512i @@ -5820,8 +5814,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_storeu_si512 (void *__P, __m512i __A) { - __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, - (__mmask16) -1); + *(__m512i_u *)__P = __A; } extern __inline void diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 9519400..9cd9aab 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -58,6 +58,17 @@ typedef long long __m256i __attribute__ ((__vector_size__ (32), typedef double __m256d __attribute__ ((__vector_size__ (32), __may_alias__)); +/* Unaligned version of the same types. */ +typedef float __m256_u __attribute__ ((__vector_size__ (32), + __may_alias__, + __aligned__ (1))); +typedef long long __m256i_u __attribute__ ((__vector_size__ (32), + __may_alias__, + __aligned__ (1))); +typedef double __m256d_u __attribute__ ((__vector_size__ (32), + __may_alias__, + __aligned__ (1))); + /* Compare predicates for scalar and packed compare intrinsics. */ /* Equal (ordered, non-signaling) */ @@ -857,25 +868,25 @@ _mm256_store_ps (float *__P, __m256 __A) extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_loadu_pd (double const *__P) { - return (__m256d) __builtin_ia32_loadupd256 (__P); + return *(__m256d_u *)__P; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_storeu_pd (double *__P, __m256d __A) { - __builtin_ia32_storeupd256 (__P, (__v4df)__A); + *(__m256d_u *)__P = __A; } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_loadu_ps (float const *__P) { - return (__m256) __builtin_ia32_loadups256 (__P); + return *(__m256_u *)__P; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_storeu_ps (float *__P, __m256 __A) { - __builtin_ia32_storeups256 (__P, (__v8sf)__A); + *(__m256_u *)__P = __A; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -891,15 +902,15 @@ _mm256_store_si256 (__m256i *__P, __m256i __A) } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_loadu_si256 (__m256i const *__P) +_mm256_loadu_si256 (__m256i_u const *__P) { - return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P); + return *__P; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_storeu_si256 (__m256i *__P, __m256i __A) +_mm256_storeu_si256 (__m256i_u *__P, __m256i __A) { - __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A); + *__P = __A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 8652fe9..b299cbc 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -52,6 +52,10 @@ typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16))); typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); +/* Unaligned version of the same types. */ +typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); +typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); + /* Create a selector for use with the SHUFPD instruction. */ #define _MM_SHUFFLE2(fp1,fp0) \ (((fp1) << 1) | (fp0)) @@ -123,7 +127,7 @@ _mm_load_pd (double const *__P) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_pd (double const *__P) { - return __builtin_ia32_loadupd (__P); + return *(__m128d_u *)__P; } /* Create a vector with all two elements equal to *P. */ @@ -165,7 +169,7 @@ _mm_store_pd (double *__P, __m128d __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_pd (double *__P, __m128d __A) { - __builtin_ia32_storeupd (__P, __A); + *(__m128d_u *)__P = __A; } /* Stores the lower DPFP value. */ @@ -693,9 +697,9 @@ _mm_load_si128 (__m128i const *__P) } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_loadu_si128 (__m128i const *__P) +_mm_loadu_si128 (__m128i_u const *__P) { - return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); + return *__P; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -711,9 +715,9 @@ _mm_store_si128 (__m128i *__P, __m128i __B) } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_storeu_si128 (__m128i *__P, __m128i __B) +_mm_storeu_si128 (__m128i_u *__P, __m128i __B) { - __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); + *__P = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index ffe5771..26516e2 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -68,6 +68,9 @@ _mm_prefetch (const void *__P, enum _mm_hint __I) vector types, and their scalar components. */ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +/* Unaligned version of the same type. */ +typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); + /* Internal data types for implementing the intrinsics. */ typedef float __v4sf __attribute__ ((__vector_size__ (16))); @@ -921,14 +924,14 @@ _mm_load_ps1 (float const *__P) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ps (float const *__P) { - return (__m128) *(__v4sf *)__P; + return *(__m128 *)__P; } /* Load four SPFP values from P. The address need not be 16-byte aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_ps (float const *__P) { - return (__m128) __builtin_ia32_loadups (__P); + return *(__m128_u *)__P; } /* Load four SPFP values in reverse order. The address must be aligned. */ @@ -970,14 +973,14 @@ _mm_cvtss_f32 (__m128 __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ps (float *__P, __m128 __A) { - *(__v4sf *)__P = (__v4sf)__A; + *(__m128 *)__P = __A; } /* Store four SPFP values. The address need not be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_ps (float *__P, __m128 __A) { - __builtin_ia32_storeups (__P, (__v4sf)__A); + *(__m128_u *)__P = __A; } /* Store the lower SPFP value across four words. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c8b85fa..61dc053 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-08-31 Marc Glisse <marc.glisse@inria.fr> + + * gcc.target/i386/pr59539-2.c: Adapt options. + * gcc.target/i386/avx512f-vmovdqu32-1.c: Relax expected asm. + 2016-08-31 Paul Thomas <pault@gcc.gnu.org> PR fortran/77418 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c index 744bfbc..a6f1c29 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c @@ -1,9 +1,9 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqu\[36\]\[24\]\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu(?:32|64)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu(?:32|64)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/pr59539-2.c b/gcc/testsuite/gcc.target/i386/pr59539-2.c index b53b8c4..eaa7057 100644 --- a/gcc/testsuite/gcc.target/i386/pr59539-2.c +++ b/gcc/testsuite/gcc.target/i386/pr59539-2.c @@ -1,6 +1,6 @@ /* PR target/59539 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx2" } */ +/* { dg-options "-O2 -march=haswell" } */ #include <immintrin.h> |