aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-08-26 05:31:50 -0700
committerliuhongt <hongtao.liu@intel.com>2021-09-13 19:55:29 +0800
commit5b01bfeb8703c264ad402b77741f06f41d7fceac (patch)
treed883af0149614d24c7c7aa237291f8153c91b8bc /gcc
parentc8e4cb8adf68730357888adf186ab2e686a220ad (diff)
downloadgcc-5b01bfeb8703c264ad402b77741f06f41d7fceac.zip
gcc-5b01bfeb8703c264ad402b77741f06f41d7fceac.tar.gz
gcc-5b01bfeb8703c264ad402b77741f06f41d7fceac.tar.bz2
x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES
1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation with 256-bit AVX instructions. 2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces operations with 256-bit AVX instructions. They are enabled only for Intel Alder Lake and Intel processors with AVX512. gcc/ PR target/101935 * config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New. (TARGET_AVX256_STORE_BY_PIECES): Likewise. (MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and TARGET_AVX256_STORE_BY_PIECES instead of TARGET_AVX256_SPLIT_UNALIGNED_LOAD and TARGET_AVX256_SPLIT_UNALIGNED_STORE. (STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead of TARGET_AVX256_SPLIT_UNALIGNED_STORE. * config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New. (X86_TUNE_AVX256_STORE_BY_PIECES): Likewise. gcc/testsuite/ PR target/101935 * g++.target/i386/pr80566-1.C: Add -mtune-ctrl=avx256_store_by_pieces. * gcc.target/i386/pr100865-4a.c: Likewise. * gcc.target/i386/pr100865-10a.c: Likewise. * gcc.target/i386/pr90773-20.c: Likewise. * gcc.target/i386/pr90773-21.c: Likewise. * gcc.target/i386/pr90773-22.c: Likewise. * gcc.target/i386/pr90773-23.c: Likewise. * g++.target/i386/pr80566-2.C: Add -mtune-ctrl=avx256_move_by_pieces. * gcc.target/i386/eh_return-1.c: Likewise. * gcc.target/i386/pr90773-26.c: Likewise. * gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell with -mtune-ctrl=avx256_move_by_pieces. * gcc.target/i386/pieces-memcpy-15.c: Likewise. * gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell with -mtune-ctrl=avx256_store_by_pieces. * gcc.target/i386/pieces-memset-5.c: Likewise. * gcc.target/i386/pieces-memset-11.c: Likewise. * gcc.target/i386/pieces-memset-14.c: Likewise. * gcc.target/i386/pieces-memset-20.c: Likewise. * gcc.target/i386/pieces-memset-23.c: Likewise. * gcc.target/i386/pieces-memset-29.c: Likewise. * gcc.target/i386/pieces-memset-30.c: Likewise. * gcc.target/i386/pieces-memset-33.c: Likewise. * gcc.target/i386/pieces-memset-34.c: Likewise. * gcc.target/i386/pieces-memset-44.c: Likewise. * gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic with -mtune-ctrl=avx256_store_by_pieces.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.h10
-rw-r--r--gcc/config/i386/x86-tune.def11
-rw-r--r--gcc/testsuite/g++.target/i386/pr80566-1.C2
-rw-r--r--gcc/testsuite/g++.target/i386/pr80566-2.C2
-rw-r--r--gcc/testsuite/gcc.target/i386/eh_return-1.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-11.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-14.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-20.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-23.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-29.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-30.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-33.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-34.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-37.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-44.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pieces-memset-5.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-10a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-4a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr90773-20.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr90773-21.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr90773-22.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr90773-23.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr90773-26.c2
26 files changed, 42 insertions, 27 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 73237b8..e76bb55 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
#define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
+#define TARGET_AVX256_MOVE_BY_PIECES \
+ ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES]
+#define TARGET_AVX256_STORE_BY_PIECES \
+ ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES]
#define TARGET_AVX256_SPLIT_REGS \
ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
#define TARGET_GENERAL_REGS_SSE_SPILL \
@@ -1793,8 +1797,8 @@ typedef struct ix86_args {
? 64 \
: ((TARGET_AVX \
&& !TARGET_PREFER_AVX128 \
- && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \
- && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+ && (TARGET_AVX256_MOVE_BY_PIECES \
+ || TARGET_AVX256_STORE_BY_PIECES)) \
? 32 \
: ((TARGET_SSE2 \
&& TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
@@ -1811,7 +1815,7 @@ typedef struct ix86_args {
? 64 \
: ((TARGET_AVX \
&& !TARGET_PREFER_AVX128 \
- && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \
+ && TARGET_AVX256_STORE_BY_PIECES) \
? 32 \
: ((TARGET_SSE2 \
&& TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 8f55da8..2f221b1 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
instructions in the auto-vectorizer. */
DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
+/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit
+ AVX instructions. */
+DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces",
+ m_ALDERLAKE | m_CORE_AVX512)
+
+/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit
+ AVX instructions. */
+DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
+ m_ALDERLAKE | m_CORE_AVX512)
+
+/*****************************************************************************/
/*****************************************************************************/
/* Historical relics: tuning flags that helps a specific old CPU designs */
/*****************************************************************************/
diff --git a/gcc/testsuite/g++.target/i386/pr80566-1.C b/gcc/testsuite/g++.target/i386/pr80566-1.C
index 753f974..29da31d 100644
--- a/gcc/testsuite/g++.target/i386/pr80566-1.C
+++ b/gcc/testsuite/g++.target/i386/pr80566-1.C
@@ -1,5 +1,5 @@
// { dg-do compile }
-// { dg-options "-O2 -march=haswell" }
+// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_store_by_pieces" }
#include <cstring>
diff --git a/gcc/testsuite/g++.target/i386/pr80566-2.C b/gcc/testsuite/g++.target/i386/pr80566-2.C
index 2a2e82d..9ffd2c8 100644
--- a/gcc/testsuite/g++.target/i386/pr80566-2.C
+++ b/gcc/testsuite/g++.target/i386/pr80566-2.C
@@ -1,5 +1,5 @@
// { dg-do compile }
-// { dg-options "-O2 -march=haswell" }
+// { dg-options "-O2 -march=haswell -mtune-ctrl=avx256_move_by_pieces" }
#include <cstring>
diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c
index 671ba63..b21fd75 100644
--- a/gcc/testsuite/gcc.target/i386/eh_return-1.c
+++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=haswell -mno-avx512f" } */
+/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
struct _Unwind_Context
{
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
index f1432eb..8a82baf 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-12.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
extern char *dst, *src;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
index 695e8c3..4fb94ce 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-15.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_move_by_pieces" } */
extern char *dst, *src;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
index 3fb9038..3802eb7 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-11.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
index 45ece48..10bc085 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
index 649f344..4ebfc4d 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
index b8747e6..1dc4db1 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-20.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
index a3b4ffc..9232864 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-23.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
index 650e6fe..3b07a64 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-29.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
index dcec2c7..59595e6 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-30.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
index a87d1b8..6864622 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-33.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
index 0c2f1ee..52a16a0 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-34.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
index ec59497..fd09bd1 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-37.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=generic" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune-ctrl=avx256_store_by_pieces" } */
void
foo (int a1, int a2, int a3, int a4, int a5, int a6, int x, char *dst)
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
index ecc31be..5986f8e 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-44.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
index 3e95db5..e2379df 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-5.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=haswell" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10a.c b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
index 98b6dfb..1d849a3 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10a.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target int128 } } */
-/* { dg-options "-O3 -march=skylake" } */
+/* { dg-options "-O3 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern __int128 array[16];
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4a.c b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
index 3654873..8609d11 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4a.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char array[64];
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-20.c b/gcc/testsuite/gcc.target/i386/pr90773-20.c
index e61e405..884a550 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-20.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-20.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 16ad17f..5bbb387 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-22.c b/gcc/testsuite/gcc.target/i386/pr90773-22.c
index 45a8ff6..245a436 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-22.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-22.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index 9256ce1..ca4a86f 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_store_by_pieces" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-26.c b/gcc/testsuite/gcc.target/i386/pr90773-26.c
index b2513c3..76fb79f 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-26.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-26.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake" } */
+/* { dg-options "-O2 -march=skylake -mtune-ctrl=avx256_move_by_pieces" } */
struct S
{