diff options
author | Jan Hubicka <jh@suse.cz> | 2013-10-19 14:11:14 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2013-10-19 12:11:14 +0000 |
commit | 41ee845b75a5025e4d376d8df8661e1340b59d0a (patch) | |
tree | 432272133578a944dd10eac6df2977c235104f3b /gcc/config/i386/x86-tune.def | |
parent | 322cb62ac5c93e21a859cce27c0d8e8b1b6c1c01 (diff) | |
download | gcc-41ee845b75a5025e4d376d8df8661e1340b59d0a.zip gcc-41ee845b75a5025e4d376d8df8661e1340b59d0a.tar.gz gcc-41ee845b75a5025e4d376d8df8661e1340b59d0a.tar.bz2 |
i386.h (ACCUMULATE_OUTGOING_ARGS): Disable accumulation for cold functions.
* config/i386/i386.h (ACCUMULATE_OUTGOING_ARGS): Disable accumulation
for cold functions.
* x86-tune.def (X86_TUNE_USE_LEAVE): Update comment.
(X86_TUNE_PUSH_MEMORY): Likewise.
(X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL,
X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL): New.
(X86_TUNE_ACCUMULATE_OUTGOING_ARGS, X86_TUNE_ALWAYS_FANCY_MATH_387): New.
* i386.c (x86_accumulate_outgoing_args, x86_arch_always_fancy_math_387,
x86_avx256_split_unaligned_load, x86_avx256_split_unaligned_store):
Remove.
(ix86_option_override_internal): Update to use tune features instead
of variables.
From-SVN: r203855
Diffstat (limited to 'gcc/config/i386/x86-tune.def')
-rw-r--r-- | gcc/config/i386/x86-tune.def | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 34484a2..42eee33 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -18,15 +18,13 @@ a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ -/* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results - negatively, so enabling for Generic64 seems like good code size - tradeoff. We can't enable it for 32bit generic because it does not - work well with PPro base chips. */ +/* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */ DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions. - Some chips, like 486 and Pentium have problems with these sequences. */ + Some chips, like 486 and Pentium works faster with separate load + and push instructions. */ DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) @@ -210,6 +208,16 @@ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", m_COREI7 | m_BDVER | m_SLM | m_GENERIC) +/* X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL: if true, unaligned loads are + split. */ +DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal", + ~(m_COREI7 | m_GENERIC)) + +/* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if true, unaligned loads are + split. */ +DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_load_optimal", + ~(m_COREI7 | m_BDVER | m_GENERIC)) + /* Use packed single precision instructions where posisble. I.e. movups instead of movupd. */ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", @@ -398,3 +406,24 @@ DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", fp converts to destination register. */ DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", m_SLM) + +/* X86_TUNE_ACCUMULATE_OUTGOING_ARGS: Allocate stack space for outgoing + arguments in prologue/epilogue instead of separately for each call + by push/pop instructions. + This increase code size by about 5% in 32bit mode, less so in 64bit mode + because parameters are passed in registers. It is considerable + win for targets without stack engine that prevents multple push operations + to happen in parallel. + + FIXME: the flags is incorrectly enabled for amdfam10, Bulldozer, + Bobcat and Generic. This is because disabling it causes large + regression on mgrid due to IRA limitation leading to unecessary + use of the frame pointer in 32bit mode. */ +DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", + m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC) + +/* X86_TUNE_ALWAYS_FANCY_MATH_387: controls use of fancy 387 operations, + such as fsqrt, fprem, fsin, fcos, fsincos etc. + Should be enabled for all targets that always has coprocesor. */ +DEF_TUNE (X86_TUNE_ALWAYS_FANCY_MATH_387, "always_fancy_math_387", + ~(m_386 | m_486)) |