aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2022-03-29 13:59:14 +0200
committerJan Hubicka <jh@suse.cz>2022-03-29 13:59:14 +0200
commit871266756c7aa6a68e36b89d8d56e3fa593cca5b (patch)
tree94750ec4f5aa2be61f85a5381852155ab5675066
parentc5db32a143604f44672cfe312ffc6edfd5c86a4a (diff)
downloadgcc-871266756c7aa6a68e36b89d8d56e3fa593cca5b.zip
gcc-871266756c7aa6a68e36b89d8d56e3fa593cca5b.tar.gz
gcc-871266756c7aa6a68e36b89d8d56e3fa593cca5b.tar.bz2
Disable gathers for znver3 for vectors with 2 or 4 elements
gcc/ChangeLog: 2022-03-28 Jan Hubicka <hubicka@ucw.cz> * config/i386/i386-builtins.cc (ix86_vectorize_builtin_gather): Test TARGET_USE_GATHER_2PARTS and TARGET_USE_GATHER_4PARTS. * config/i386/i386.h (TARGET_USE_GATHER_2PARTS): New macro. (TARGET_USE_GATHER_4PARTS): New macro. * config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): New tune (X86_TUNE_USE_GATHER_4PARTS): New tune
-rw-r--r--gcc/config/i386/i386-builtins.cc7
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/config/i386/x86-tune.def13
3 files changed, 22 insertions, 2 deletions
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 2570501..8c6d0fe 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1785,7 +1785,12 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
bool si;
enum ix86_builtins code;
- if (! TARGET_AVX2 || !TARGET_USE_GATHER)
+ if (! TARGET_AVX2
+ || (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 2u)
+ ? !TARGET_USE_GATHER_2PARTS
+ : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
+ ? !TARGET_USE_GATHER_4PARTS
+ : !TARGET_USE_GATHER)))
return NULL_TREE;
if ((TREE_CODE (index_type) != INTEGER_TYPE
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b929551..363082b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -390,6 +390,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_SLOW_PSHUFB]
#define TARGET_AVOID_4BYTE_PREFIXES \
ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES]
+#define TARGET_USE_GATHER_2PARTS \
+ ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS]
+#define TARGET_USE_GATHER_4PARTS \
+ ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
#define TARGET_USE_GATHER \
ix86_tune_features[X86_TUNE_USE_GATHER]
#define TARGET_FUSE_CMP_AND_BRANCH_32 \
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 82ca0ae..d983e2f 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -464,7 +464,18 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
| m_INTEL)
-/* X86_TUNE_USE_GATHER: Use gather instructions. */
+/* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
+ elements. */
+DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_GENERIC))
+
+/* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
+ elements. */
+DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_GENERIC))
+
+/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
+ elements. */
DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
~(m_ZNVER1 | m_ZNVER2 | m_ALDERLAKE | m_GENERIC))