diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2021-08-26 05:31:50 -0700 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-09-13 19:55:29 +0800 |
commit | 5b01bfeb8703c264ad402b77741f06f41d7fceac (patch) | |
tree | d883af0149614d24c7c7aa237291f8153c91b8bc /gcc/config | |
parent | c8e4cb8adf68730357888adf186ab2e686a220ad (diff) | |
download | gcc-5b01bfeb8703c264ad402b77741f06f41d7fceac.zip gcc-5b01bfeb8703c264ad402b77741f06f41d7fceac.tar.gz gcc-5b01bfeb8703c264ad402b77741f06f41d7fceac.tar.bz2 |
x86: Add TARGET_AVX256_[MOVE|STORE]_BY_PIECES
1. Add TARGET_AVX256_MOVE_BY_PIECES to perform move by-pieces operation
with 256-bit AVX instructions.
2. Add TARGET_AVX256_STORE_BY_PIECES to perform move and store by-pieces
operations with 256-bit AVX instructions.
They are enabled only for Intel Alder Lake and Intel processors with
AVX512.
gcc/
PR target/101935
* config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New.
(TARGET_AVX256_STORE_BY_PIECES): Likewise.
(MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and
TARGET_AVX256_STORE_BY_PIECES instead of
TARGET_AVX256_SPLIT_UNALIGNED_LOAD and
TARGET_AVX256_SPLIT_UNALIGNED_STORE.
(STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead
of TARGET_AVX256_SPLIT_UNALIGNED_STORE.
* config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New.
(X86_TUNE_AVX256_STORE_BY_PIECES): Likewise.
gcc/testsuite/
PR target/101935
* g++.target/i386/pr80566-1.C: Add
-mtune-ctrl=avx256_store_by_pieces.
* gcc.target/i386/pr100865-4a.c: Likewise.
* gcc.target/i386/pr100865-10a.c: Likewise.
* gcc.target/i386/pr90773-20.c: Likewise.
* gcc.target/i386/pr90773-21.c: Likewise.
* gcc.target/i386/pr90773-22.c: Likewise.
* gcc.target/i386/pr90773-23.c: Likewise.
* g++.target/i386/pr80566-2.C: Add
-mtune-ctrl=avx256_move_by_pieces.
* gcc.target/i386/eh_return-1.c: Likewise.
* gcc.target/i386/pr90773-26.c: Likewise.
* gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell
with -mtune-ctrl=avx256_move_by_pieces.
* gcc.target/i386/pieces-memcpy-15.c: Likewise.
* gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell
with -mtune-ctrl=avx256_store_by_pieces.
* gcc.target/i386/pieces-memset-5.c: Likewise.
* gcc.target/i386/pieces-memset-11.c: Likewise.
* gcc.target/i386/pieces-memset-14.c: Likewise.
* gcc.target/i386/pieces-memset-20.c: Likewise.
* gcc.target/i386/pieces-memset-23.c: Likewise.
* gcc.target/i386/pieces-memset-29.c: Likewise.
* gcc.target/i386/pieces-memset-30.c: Likewise.
* gcc.target/i386/pieces-memset-33.c: Likewise.
* gcc.target/i386/pieces-memset-34.c: Likewise.
* gcc.target/i386/pieces-memset-44.c: Likewise.
* gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic
with -mtune-ctrl=avx256_store_by_pieces.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.h | 10 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 11 |
2 files changed, 18 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 73237b8..e76bb55 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -403,6 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR] #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \ ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL] +#define TARGET_AVX256_MOVE_BY_PIECES \ + ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES] +#define TARGET_AVX256_STORE_BY_PIECES \ + ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES] #define TARGET_AVX256_SPLIT_REGS \ ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS] #define TARGET_GENERAL_REGS_SSE_SPILL \ @@ -1793,8 +1797,8 @@ typedef struct ix86_args { ? 64 \ : ((TARGET_AVX \ && !TARGET_PREFER_AVX128 \ - && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD \ - && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \ + && (TARGET_AVX256_MOVE_BY_PIECES \ + || TARGET_AVX256_STORE_BY_PIECES)) \ ? 32 \ : ((TARGET_SSE2 \ && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ @@ -1811,7 +1815,7 @@ typedef struct ix86_args { ? 64 \ : ((TARGET_AVX \ && !TARGET_PREFER_AVX128 \ - && !TARGET_AVX256_SPLIT_UNALIGNED_STORE) \ + && TARGET_AVX256_STORE_BY_PIECES) \ ? 32 \ : ((TARGET_SSE2 \ && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 8f55da8..2f221b1 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -484,6 +484,17 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 instructions in the auto-vectorizer. */ DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) +/* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit + AVX instructions. */ +DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces", + m_ALDERLAKE | m_CORE_AVX512) + +/* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit + AVX instructions. */ +DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", + m_ALDERLAKE | m_CORE_AVX512) + +/*****************************************************************************/ /*****************************************************************************/ /* Historical relics: tuning flags that helps a specific old CPU designs */ /*****************************************************************************/ |