diff options
author | Jan Hubicka <jh@suse.cz> | 2022-03-29 13:59:14 +0200 |
---|---|---|
committer | Jan Hubicka <jh@suse.cz> | 2022-03-29 13:59:14 +0200 |
commit | 871266756c7aa6a68e36b89d8d56e3fa593cca5b (patch) | |
tree | 94750ec4f5aa2be61f85a5381852155ab5675066 | |
parent | c5db32a143604f44672cfe312ffc6edfd5c86a4a (diff) | |
download | gcc-871266756c7aa6a68e36b89d8d56e3fa593cca5b.zip gcc-871266756c7aa6a68e36b89d8d56e3fa593cca5b.tar.gz gcc-871266756c7aa6a68e36b89d8d56e3fa593cca5b.tar.bz2 |
Disable gathers for znver3 for vectors with 2 or 4 elements
gcc/ChangeLog:
2022-03-28 Jan Hubicka <hubicka@ucw.cz>
* config/i386/i386-builtins.cc (ix86_vectorize_builtin_gather): Test
TARGET_USE_GATHER_2PARTS and TARGET_USE_GATHER_4PARTS.
* config/i386/i386.h (TARGET_USE_GATHER_2PARTS): New macro.
(TARGET_USE_GATHER_4PARTS): New macro.
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): New tune
(X86_TUNE_USE_GATHER_4PARTS): New tune
-rw-r--r-- | gcc/config/i386/i386-builtins.cc | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 13 |
3 files changed, 22 insertions, 2 deletions
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 2570501..8c6d0fe 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -1785,7 +1785,12 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, bool si; enum ix86_builtins code; - if (! TARGET_AVX2 || !TARGET_USE_GATHER) + if (! TARGET_AVX2 + || (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 2u) + ? !TARGET_USE_GATHER_2PARTS + : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u) + ? !TARGET_USE_GATHER_4PARTS + : !TARGET_USE_GATHER))) return NULL_TREE; if ((TREE_CODE (index_type) != INTEGER_TYPE diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b929551..363082b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -390,6 +390,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_SLOW_PSHUFB] #define TARGET_AVOID_4BYTE_PREFIXES \ ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES] +#define TARGET_USE_GATHER_2PARTS \ + ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] +#define TARGET_USE_GATHER_4PARTS \ + ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] #define TARGET_USE_GATHER \ ix86_tune_features[X86_TUNE_USE_GATHER] #define TARGET_FUSE_CMP_AND_BRANCH_32 \ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 82ca0ae..d983e2f 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -464,7 +464,18 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_INTEL) -/* X86_TUNE_USE_GATHER: Use gather instructions. */ +/* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2 + elements. */ +DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_GENERIC)) + +/* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 + elements. */ +DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_GENERIC)) + +/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", ~(m_ZNVER1 | m_ZNVER2 | m_ALDERLAKE | m_GENERIC)) |