diff options
author | liuhongt <hongtao.liu@intel.com> | 2024-08-15 12:54:07 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2024-08-22 10:30:25 +0800 |
commit | 6ea25c041964bf63014fcf7bb68fb1f5a0a4e123 (patch) | |
tree | a9c2b0a496c098a0035099637aa38186363b7fc1 /gcc | |
parent | f155534979e367c189d5210daa54af93c39ce683 (diff) | |
download | gcc-6ea25c041964bf63014fcf7bb68fb1f5a0a4e123.zip gcc-6ea25c041964bf63014fcf7bb68fb1f5a0a4e123.tar.gz gcc-6ea25c041964bf63014fcf7bb68fb1f5a0a4e123.tar.bz2 |
Align ix86_{move_max,store_max} with vectorizer.
When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
set ix86_{move_max,store_max} as max available vector length except
for AVX part.
if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
&& TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
opts->x_ix86_move_max = PVW_AVX512;
else
opts->x_ix86_move_max = PVW_AVX128;
So for -mavx2, vectorizer will choose 256-bit for vectorization, but
128-bit is used for struct copy, there could be a potential STLF issue
due to this "misalign".
The patch fixes that.
gcc/ChangeLog:
* config/i386/i386-options.cc (ix86_option_override_internal):
set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
instead of PVW_AVX128.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
* gcc.target/i386/pieces-memcpy-6.c: Ditto.
* gcc.target/i386/pieces-memset-38.c: Ditto.
* gcc.target/i386/pieces-memset-40.c: Ditto.
* gcc.target/i386/pieces-memset-41.c: Ditto.
* gcc.target/i386/pieces-memset-42.c: Ditto.
* gcc.target/i386/pieces-memset-43.c: Ditto.
* gcc.target/i386/pieces-strcpy-2.c: Ditto.
* gcc.target/i386/pieces-memcpy-22.c: New test.
* gcc.target/i386/pieces-memset-51.c: New test.
* gcc.target/i386/pieces-strcpy-3.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-options.cc | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 |
12 files changed, 53 insertions, 8 deletions
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index f423455..f79257c 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_move_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_move_max = PVW_AVX256; else opts->x_ix86_move_max = PVW_AVX128; } @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_store_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_store_max = PVW_AVX256; else opts->x_ix86_store_max = PVW_AVX128; } diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c index 5faee21..53ad0b3 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c new file mode 100644 index 0000000..605b362 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 33); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c index 5f99cc9..cfd2a86 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c index ed4a24a..ddd194d 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c index 86358c9..5878876 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c index d7a27f5..27a6c8a 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ /* Cope with --enable-frame-pointer. */ /* { dg-additional-options "-fomit-frame-pointer" } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c index df0c122..103da69 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c index 2f2179c..f1494e1 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c new file mode 100644 index 0000000..192ec0d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst; + +void +foo (int x) +{ + __builtin_memset (dst, x, 64); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c index 90446ed..9bb94b7 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *strcpy (char *, const char *); diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c new file mode 100644 index 0000000..df7571b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ |