diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2013-11-12 13:26:51 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2013-11-12 05:26:51 -0800 |
commit | cd3c1b1c7065fa81203bc3c0daf4e81104377e2a (patch) | |
tree | 76b7d3ee8a4b157a605bf72f38d720713f152b4e /gcc | |
parent | 732dad8f32152ed7cbb2d4cb8ba0fc21f608fe70 (diff) | |
download | gcc-cd3c1b1c7065fa81203bc3c0daf4e81104377e2a.zip gcc-cd3c1b1c7065fa81203bc3c0daf4e81104377e2a.tar.gz gcc-cd3c1b1c7065fa81203bc3c0daf4e81104377e2a.tar.bz2 |
Properly handle AVX256 unaligned load and store
PR target/59084
* config/i386/i386.c (ix86_option_override_internal): Check
X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL and
X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL for
MASK_AVX256_SPLIT_UNALIGNED_LOAD and
MASK_AVX256_SPLIT_UNALIGNED_STORE.
* config/i386/x86-tune.def (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL):
Clear m_COREI7_AVX and update comments.
(X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL): Likewise.
From-SVN: r204700
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 4 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 10 |
3 files changed, 20 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4a8b2e1..3cdf247 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2013-11-12 H.J. Lu <hongjiu.lu@intel.com> + + PR target/59084 + * config/i386/i386.c (ix86_option_override_internal): Check + X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL and + X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL for + MASK_AVX256_SPLIT_UNALIGNED_LOAD and + MASK_AVX256_SPLIT_UNALIGNED_STORE. + + * config/i386/x86-tune.def (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL): + Clear m_COREI7_AVX and update comments. + (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL): Likewise. + 2013-11-12 Martin Jambor <mjambor@suse.cz> PR rtl-optimization/10474 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8b8cdfae..924cb66 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3974,10 +3974,10 @@ ix86_option_override_internal (bool main_args_p, if (flag_expensive_optimizations && !(opts_set->x_target_flags & MASK_VZEROUPPER)) opts->x_target_flags |= MASK_VZEROUPPER; - if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL] + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; - if (!ix86_tune_features[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL] + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; /* Enable 128-bit AVX instruction generation diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 23879f9..54867d2 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -376,15 +376,15 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ -/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if true, unaligned loads are +/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if false, unaligned loads are split. */ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal", - ~(m_COREI7 | m_GENERIC)) + ~(m_COREI7 | m_COREI7_AVX | m_GENERIC)) -/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if true, unaligned loads are +/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are split. */ -DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_load_optimal", - ~(m_COREI7 | m_BDVER | m_GENERIC)) +DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal", + ~(m_COREI7 | m_COREI7_AVX | m_BDVER | m_GENERIC)) /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for the auto-vectorizer. */ |