diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2020-01-28 11:32:56 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2020-01-28 11:33:12 -0800 |
commit | dd9b529f08c3c6064c37234922d298336d78caf7 (patch) | |
tree | 7851f3104602455f14e3013c32a8d1a08d450a85 | |
parent | 6c8e584430bc5dc01b4438f3c38a2a10fcba7685 (diff) | |
download | gcc-dd9b529f08c3c6064c37234922d298336d78caf7.zip gcc-dd9b529f08c3c6064c37234922d298336d78caf7.tar.gz gcc-dd9b529f08c3c6064c37234922d298336d78caf7.tar.bz2 |
i386: Prefer TARGET_AVX over TARGET_SSE_TYPELESS_STORES
movaps/movups is one byte shorter than movdqa/movdqu. But it isn't the
case for AVX nor AVX512. This patch prefers TARGET_AVX over
TARGET_SSE_TYPELESS_STORES and adjust vmovups checks in assembly ouputs.
gcc/
PR target/91461
* config/i386/i386.md (*movoi_internal_avx): Remove
TARGET_SSE_TYPELESS_STORES check.
(*movti_internal): Prefer TARGET_AVX over
TARGET_SSE_TYPELESS_STORES.
(*movtf_internal): Likewise.
* config/i386/sse.md (mov<mode>_internal): Prefer TARGET_AVX over
TARGET_SSE_TYPELESS_STORES. Remove "<MODE_SIZE> == 16" check
from TARGET_SSE_TYPELESS_STORES.
gcc/testsuite/
PR target/91461
* gcc.target/i386/avx256-unaligned-store-2.c: Don't check
vmovups.
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
* gcc.target/i386/pieces-memcpy-4.c: Likewise.
* gcc.target/i386/pieces-memcpy-5.c: Likewise.
* gcc.target/i386/pieces-memcpy-6.c: Likewise.
* gcc.target/i386/pieces-strcpy-2.c: Likewise.
* gcc.target/i386/pr90980-1.c: Likewise.
* gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of
"vmovd" to avoid matching "vmovdqu".
* gcc.target/i386/pr87317-5.c: Likewise.
* gcc.target/i386/pr87317-7.c: Likewise.
* gcc.target/i386/pr91461-1.c: New test.
* gcc.target/i386/pr91461-2.c: Likewise.
* gcc.target/i386/pr91461-3.c: Likewise.
* gcc.target/i386/pr91461-4.c: Likewise.
* gcc.target/i386/pr91461-5.c: Likewise.
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 12 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 9 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr87317-4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr87317-5.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr87317-7.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr90980-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr91461-1.c | 66 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr91461-2.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr91461-3.c | 76 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr91461-4.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr91461-5.c | 17 |
19 files changed, 253 insertions, 27 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5134748..05f3b72 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2020-01-28 H.J. Lu <hongjiu.lu@intel.com> + + PR target/91461 + * config/i386/i386.md (*movoi_internal_avx): Remove + TARGET_SSE_TYPELESS_STORES check. + (*movti_internal): Prefer TARGET_AVX over + TARGET_SSE_TYPELESS_STORES. + (*movtf_internal): Likewise. + * config/i386/sse.md (mov<mode>_internal): Prefer TARGET_AVX over + TARGET_SSE_TYPELESS_STORES. Remove "<MODE_SIZE> == 16" check + from TARGET_SSE_TYPELESS_STORES. + 2020-01-28 David Malcolm <dmalcolm@redhat.com> * diagnostic-core.h (warning_at): Rename overload to... diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a125ab3..9f0077d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1980,9 +1980,7 @@ (and (eq_attr "alternative" "1") (match_test "TARGET_AVX512VL")) (const_string "XI") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (and (eq_attr "alternative" "3") - (match_test "TARGET_SSE_TYPELESS_STORES"))) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V8SF") ] (const_string "OI")))]) @@ -2059,13 +2057,13 @@ (and (eq_attr "alternative" "3") (match_test "TARGET_AVX512VL")) (const_string "XI") + (match_test "TARGET_AVX") + (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (and (eq_attr "alternative" "5") (match_test "TARGET_SSE_TYPELESS_STORES")))) (const_string "V4SF") - (match_test "TARGET_AVX") - (const_string "TI") (match_test "optimize_function_for_size_p (cfun)") (const_string "V4SF") ] @@ -3324,13 +3322,13 @@ (set (attr "mode") (cond [(eq_attr "alternative" "3,4") (const_string "DI") + (match_test "TARGET_AVX") + (const_string "TI") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (and (eq_attr "alternative" "2") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") - (match_test "TARGET_AVX") - (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 04a8c5e..abbd879 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1116,13 +1116,12 @@ (cond [(and (eq_attr "alternative" "1") (match_test "TARGET_AVX512VL")) (const_string "<sseinsnmode>") - (and (match_test "<MODE_SIZE> == 16") - (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") - (and (eq_attr "alternative" "3") - (match_test "TARGET_SSE_TYPELESS_STORES")))) - (const_string "<ssePSmode>") (match_test "TARGET_AVX") (const_string "<sseinsnmode>") + (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (and (eq_attr "alternative" "3") + (match_test "TARGET_SSE_TYPELESS_STORES"))) + (const_string "<ssePSmode>") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 693650d..37ab4b9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2020-01-28 H.J. Lu <hongjiu.lu@intel.com> + + PR target/91461 + * gcc.target/i386/avx256-unaligned-store-2.c: Don't check + vmovups. + * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. + * gcc.target/i386/pieces-memcpy-4.c: Likewise. + * gcc.target/i386/pieces-memcpy-5.c: Likewise. + * gcc.target/i386/pieces-memcpy-6.c: Likewise. + * gcc.target/i386/pieces-strcpy-2.c: Likewise. + * gcc.target/i386/pr90980-1.c: Likewise. + * gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of + "vmovd" to avoid matching "vmovdqu". + * gcc.target/i386/pr87317-5.c: Likewise. + * gcc.target/i386/pr87317-7.c: Likewise. + * gcc.target/i386/pr91461-1.c: New test. + * gcc.target/i386/pr91461-2.c: Likewise. + * gcc.target/i386/pr91461-3.c: Likewise. + * gcc.target/i386/pr91461-4.c: Likewise. + * gcc.target/i386/pr91461-5.c: Likewise. + 2020-01-28 David Malcolm <dmalcolm@redhat.com> * gcc.dg/plugin/diagnostic_plugin_test_metadata.c: Update for diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c index 1e7969b..be12529 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c @@ -23,6 +23,6 @@ avx_test (void) } } -/* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */ -/* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovdqu.*movv32qi_internal/3" } } */ +/* { dg-final { scan-assembler "vmovdqu.*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vextract.128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c index a439a66..918028d 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c @@ -17,6 +17,6 @@ avx_test (void) d[i] = c[i] * 20.0; } -/* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */ -/* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */ +/* { dg-final { scan-assembler-not "vmovupd.*movv4df_internal/3" } } */ +/* { dg-final { scan-assembler "vmovupd.*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c index 64e8921..6f20203 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-4.c @@ -9,5 +9,4 @@ foo (void) __builtin_memcpy (dst, src, 18); } -/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c index 3c464c3..5a1c7b3 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-5.c @@ -9,5 +9,4 @@ foo (void) __builtin_memcpy (dst, src, 19); } -/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c index cdb00e0..5f99cc9 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -9,5 +9,4 @@ foo (void) __builtin_memcpy (dst, src, 33); } -/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c index 7421255..90446ed 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -12,4 +12,4 @@ foo (char *s) } /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ -/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr87317-4.c b/gcc/testsuite/gcc.target/i386/pr87317-4.c index 2d4f24a..d802575 100644 --- a/gcc/testsuite/gcc.target/i386/pr87317-4.c +++ b/gcc/testsuite/gcc.target/i386/pr87317-4.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=haswell" } */ /* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */ -/* { dg-final { scan-assembler-not "vmovd" } } */ +/* { dg-final { scan-assembler-not "\tvmovd\t" } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/pr87317-5.c b/gcc/testsuite/gcc.target/i386/pr87317-5.c index 96f8284..42cf7dc 100644 --- a/gcc/testsuite/gcc.target/i386/pr87317-5.c +++ b/gcc/testsuite/gcc.target/i386/pr87317-5.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=haswell" } */ /* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */ -/* { dg-final { scan-assembler-not "vmovd" } } */ +/* { dg-final { scan-assembler-not "\tvmovd\t" } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/pr87317-7.c b/gcc/testsuite/gcc.target/i386/pr87317-7.c index 2c043d9..c76af7e 100644 --- a/gcc/testsuite/gcc.target/i386/pr87317-7.c +++ b/gcc/testsuite/gcc.target/i386/pr87317-7.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -march=haswell" } */ /* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */ -/* { dg-final { scan-assembler-not "vmovd" } } */ +/* { dg-final { scan-assembler-not "\tvmovd\t" } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/pr90980-1.c b/gcc/testsuite/gcc.target/i386/pr90980-1.c index 72a30dc..8855189 100644 --- a/gcc/testsuite/gcc.target/i386/pr90980-1.c +++ b/gcc/testsuite/gcc.target/i386/pr90980-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=skylake-avx512 -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[2346\]*\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/pr91461-1.c b/gcc/testsuite/gcc.target/i386/pr91461-1.c new file mode 100644 index 0000000..0c94b8e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-1.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ +/* { dg-final { scan-assembler "\tvmovdqu\t" } } */ +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ +/* { dg-final { scan-assembler "\tvmovupd\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m128i *p, __m128i x) +{ + *p = x; +} + +void +foo2 (__m128d *p, __m128d x) +{ + *p = x; +} + +void +foo3 (__float128 *p, __float128 x) +{ + *p = x; +} + +void +foo4 (__m128i_u *p, __m128i x) +{ + *p = x; +} + +void +foo5 (__m128d_u *p, __m128d x) +{ + *p = x; +} + +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); + +void +foo6 (__float128_u *p, __float128 x) +{ + *p = x; +} + +#ifdef __x86_64__ +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); + +extern __int128 int128; + +void +foo7 (__int128 *p) +{ + *p = int128; +} + +void +foo8 (__int128_u *p) +{ + *p = int128; +} +#endif diff --git a/gcc/testsuite/gcc.target/i386/pr91461-2.c b/gcc/testsuite/gcc.target/i386/pr91461-2.c new file mode 100644 index 0000000..921cfaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler "\tvmovdqa\t" } } */ +/* { dg-final { scan-assembler "\tvmovapd\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m256i *p, __m256i x) +{ + *p = x; +} + +void +foo2 (__m256d *p, __m256d x) +{ + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-3.c b/gcc/testsuite/gcc.target/i386/pr91461-3.c new file mode 100644 index 0000000..c67a480 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-3.c @@ -0,0 +1,76 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ +/* { dg-final { scan-assembler-not "\tvmovups\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m128i *p, __m128i a) +{ + register __m128i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo2 (__m128d *p, __m128d a) +{ + register __m128d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo3 (__float128 *p, __float128 a) +{ + register __float128 x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo4 (__m128i_u *p, __m128i a) +{ + register __m128i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo5 (__m128d_u *p, __m128d a) +{ + register __m128d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +typedef __float128 __float128_u __attribute__ ((__aligned__ (1))); + +void +foo6 (__float128_u *p, __float128 a) +{ + register __float128 x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +typedef __int128 __int128_u __attribute__ ((__aligned__ (1))); + +extern __int128 int128; + +void +foo7 (__int128 *p) +{ + register __int128 x __asm ("xmm16") = int128; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo8 (__int128_u *p) +{ + register __int128 x __asm ("xmm16") = int128; + asm volatile ("" : "+v" (x)); + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-4.c b/gcc/testsuite/gcc.target/i386/pr91461-4.c new file mode 100644 index 0000000..69df590 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-4.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512f -mavx512vl" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m256i *p, __m256i a) +{ + register __m256i x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} + +void +foo2 (__m256d *p, __m256d a) +{ + register __m256d x __asm ("xmm16") = a; + asm volatile ("" : "+v" (x)); + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr91461-5.c b/gcc/testsuite/gcc.target/i386/pr91461-5.c new file mode 100644 index 0000000..9742630 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91461-5.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */ + +#include <immintrin.h> + +void +foo1 (__m512i *p, __m512i x) +{ + *p = x; +} + +void +foo2 (__m512d *p, __m512d x) +{ + *p = x; +} |