diff options
author | Jan Hubicka <jh@suse.cz> | 2013-09-21 22:59:10 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2013-09-21 20:59:10 +0000 |
commit | 0ca6c49ff1069176f2fd34e3e13ed44aaea23bdb (patch) | |
tree | 70bc44cb139cf493dad73437141afad954c192ed | |
parent | 765c1354c7c9a81e9d78504a8db051ca23df7b92 (diff) | |
download | gcc-0ca6c49ff1069176f2fd34e3e13ed44aaea23bdb.zip gcc-0ca6c49ff1069176f2fd34e3e13ed44aaea23bdb.tar.gz gcc-0ca6c49ff1069176f2fd34e3e13ed44aaea23bdb.tar.bz2 |
x86-tune.def (partial_reg_stall): Disable for CoreI7 and newer.
* x86-tune.def (partial_reg_stall): Disable for CoreI7 and newer.
(sse_typeless_stores): Enable for core
(sse_load0_by_pxor): Likewise.
(four_jump_limit): Disable for core.
(pad_returns): Likewise.
(avoid_vector_decode): Likewise.
(fuse_cmp_and_branch): Enable for cores.
* i386.c (x86_accumulate_outgoing_args): Disable for cores.
From-SVN: r202813
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 16 |
3 files changed, 21 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6f7454e..a4dbd39 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2013-09-20 Jan Hubicka <jh@suse.cz> + + * x86-tune.def (partial_reg_stall): Disable for CoreI7 and newer. + (sse_typeless_stores): Enable for core + (sse_load0_by_pxor): Likewise. + (four_jump_limit): Disable for core. + (pad_returns): Likewise. + (avoid_vector_decode): Likewise. + (fuse_cmp_and_branch): Enable for cores. + * i386.c (x86_accumulate_outgoing_args): Disable for cores. + 2013-09-20 John David Anglin <danglin@gcc.gnu.org> PR middle-end/56791 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 46c37d8..0c10e73 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1899,7 +1899,7 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { }; static const unsigned int x86_accumulate_outgoing_args - = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC; + = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC; static const unsigned int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC; diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index fc19df1..5f2e897 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -52,7 +52,7 @@ DEF_TUNE (X86_TUNE_MOVX, "movx", and can happen in caller/callee saving sequences. */ DEF_TUNE (X86_TUNE_PARTIAL_REG_STALL, "partial_reg_stall", m_PPRO) DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", - m_CORE_ALL | m_GENERIC) + m_CORE2 | m_GENERIC) /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall * on 16-bit immediate moves into memory on Core2 and Corei7. */ DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_GENERIC) @@ -125,8 +125,10 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim maintain just lower part of scalar values in proper format leaving the upper part undefined. */ DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8) -DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", m_AMD_MULTIPLE) -DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA) +DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", + m_AMD_MULTIPLE | m_CORE_ALL) +DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", + m_PPRO | m_P4_NOCONA | m_CORE_ALL) DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC) DEF_TUNE (X86_TUNE_PROLOGUE_USING_MOVE, "prologue_using_move", @@ -144,7 +146,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more than 4 branch instructions in the 16 byte window. */ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", - m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM| m_AMD_MULTIPLE + m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC) DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE @@ -154,13 +156,13 @@ DEF_TUNE (X86_TUNE_USE_BT, "use_bt", DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC)) DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", - m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC) + m_AMD_MULTIPLE | m_GENERIC) DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_ATOM) DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC) DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode", - m_CORE_ALL | m_K8 | m_GENERIC) + m_K8 | m_GENERIC) /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode and SImode multiply, but 386 and 486 do HImode multiply faster. */ DEF_TUNE (X86_TUNE_PROMOTE_HIMODE_IMUL, "promote_himode_imul", @@ -193,7 +195,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction with a subsequent conditional jump instruction into a single compare-and-branch uop. */ -DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER) +DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER | m_CORE_ALL) /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM) |