aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2013-11-12 13:26:51 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2013-11-12 05:26:51 -0800
commitcd3c1b1c7065fa81203bc3c0daf4e81104377e2a (patch)
tree76b7d3ee8a4b157a605bf72f38d720713f152b4e /gcc
parent732dad8f32152ed7cbb2d4cb8ba0fc21f608fe70 (diff)
downloadgcc-cd3c1b1c7065fa81203bc3c0daf4e81104377e2a.zip
gcc-cd3c1b1c7065fa81203bc3c0daf4e81104377e2a.tar.gz
gcc-cd3c1b1c7065fa81203bc3c0daf4e81104377e2a.tar.bz2
Properly handle AVX256 unaligned load and store
PR target/59084 * config/i386/i386.c (ix86_option_override_internal): Check X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL and X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL for MASK_AVX256_SPLIT_UNALIGNED_LOAD and MASK_AVX256_SPLIT_UNALIGNED_STORE. * config/i386/x86-tune.def (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL): Clear m_COREI7_AVX and update comments. (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL): Likewise. From-SVN: r204700
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog13
-rw-r--r--gcc/config/i386/i386.c4
-rw-r--r--gcc/config/i386/x86-tune.def10
3 files changed, 20 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4a8b2e1..3cdf247 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2013-11-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/59084
+ * config/i386/i386.c (ix86_option_override_internal): Check
+ X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL and
+ X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL for
+ MASK_AVX256_SPLIT_UNALIGNED_LOAD and
+ MASK_AVX256_SPLIT_UNALIGNED_STORE.
+
+ * config/i386/x86-tune.def (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL):
+ Clear m_COREI7_AVX and update comments.
+ (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL): Likewise.
+
2013-11-12 Martin Jambor <mjambor@suse.cz>
PR rtl-optimization/10474
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8b8cdfae..924cb66 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3974,10 +3974,10 @@ ix86_option_override_internal (bool main_args_p,
if (flag_expensive_optimizations
&& !(opts_set->x_target_flags & MASK_VZEROUPPER))
opts->x_target_flags |= MASK_VZEROUPPER;
- if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
+ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
&& !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
- if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL]
+ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
&& !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
/* Enable 128-bit AVX instruction generation
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 23879f9..54867d2 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -376,15 +376,15 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
/*****************************************************************************/
-/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if true, unaligned loads are
+/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if false, unaligned loads are
split. */
DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal",
- ~(m_COREI7 | m_GENERIC))
+ ~(m_COREI7 | m_COREI7_AVX | m_GENERIC))
-/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if true, unaligned loads are
+/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are
split. */
-DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_load_optimal",
- ~(m_COREI7 | m_BDVER | m_GENERIC))
+DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal",
+ ~(m_COREI7 | m_COREI7_AVX | m_BDVER | m_GENERIC))
/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
the auto-vectorizer. */