diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2011-07-12 21:47:31 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2011-07-12 21:47:31 +0200 |
commit | 3a4ffde68cfc6fee3c20d282d6690f2569e2fffa (patch) | |
tree | b1c9134113b94c3249dfac2d15392db2d2908a84 /gcc | |
parent | 10b75750f251ad6a5cdeb505f51ddc4638d81e14 (diff) | |
download | gcc-3a4ffde68cfc6fee3c20d282d6690f2569e2fffa.zip gcc-3a4ffde68cfc6fee3c20d282d6690f2569e2fffa.tar.gz gcc-3a4ffde68cfc6fee3c20d282d6690f2569e2fffa.tar.bz2 |
i386.c: Tidy processor feature bitmasks.
* config/i386/i386.c: Tidy processor feature bitmasks.
(m_P4_NOCONA): New.
From-SVN: r176215
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 125 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 4 |
3 files changed, 63 insertions, 71 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f3abf4d..f820c2d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2011-07-12 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.c: Tidy processor feature bitmasks. + (m_P4_NOCONA): New. + 2011-07-12 Andrew Pinski <pinskia@gmail.com> PR rtl-opt/49474 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 325d901..24c906c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1880,30 +1880,31 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_486 (1<<PROCESSOR_I486) #define m_PENT (1<<PROCESSOR_PENTIUM) #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) -#define m_PENT4 (1<<PROCESSOR_PENTIUM4) -#define m_NOCONA (1<<PROCESSOR_NOCONA) -#define m_CORE2_32 (1<<PROCESSOR_CORE2_32) -#define m_CORE2_64 (1<<PROCESSOR_CORE2_64) -#define m_COREI7_32 (1<<PROCESSOR_COREI7_32) -#define m_COREI7_64 (1<<PROCESSOR_COREI7_64) -#define m_COREI7 (m_COREI7_32 | m_COREI7_64) -#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32) -#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64) -#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64) -#define m_ATOM (1<<PROCESSOR_ATOM) - -#define m_GEODE (1<<PROCESSOR_GEODE) -#define m_K6 (1<<PROCESSOR_K6) -#define m_K6_GEODE (m_K6 | m_GEODE) -#define m_K8 (1<<PROCESSOR_K8) -#define m_ATHLON (1<<PROCESSOR_ATHLON) -#define m_ATHLON_K8 (m_K8 | m_ATHLON) -#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) -#define m_BDVER1 (1<<PROCESSOR_BDVER1) -#define m_BDVER2 (1<<PROCESSOR_BDVER2) -#define m_BTVER1 (1<<PROCESSOR_BTVER1) -#define m_BDVER (m_BDVER1 | m_BDVER2) -#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1) +#define m_PENT4 (1<<PROCESSOR_PENTIUM4) +#define m_NOCONA (1<<PROCESSOR_NOCONA) +#define m_P4_NOCONA (m_PENT4 | m_NOCONA) +#define m_CORE2_32 (1<<PROCESSOR_CORE2_32) +#define m_CORE2_64 (1<<PROCESSOR_CORE2_64) +#define m_COREI7_32 (1<<PROCESSOR_COREI7_32) +#define m_COREI7_64 (1<<PROCESSOR_COREI7_64) +#define m_COREI7 (m_COREI7_32 | m_COREI7_64) +#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32) +#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64) +#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64) +#define m_ATOM (1<<PROCESSOR_ATOM) + +#define m_GEODE (1<<PROCESSOR_GEODE) +#define m_K6 (1<<PROCESSOR_K6) +#define m_K6_GEODE (m_K6 | m_GEODE) +#define m_K8 (1<<PROCESSOR_K8) +#define m_ATHLON (1<<PROCESSOR_ATHLON) +#define m_ATHLON_K8 (m_K8 | m_ATHLON) +#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) +#define m_BDVER1 (1<<PROCESSOR_BDVER1) +#define m_BDVER2 (1<<PROCESSOR_BDVER2) +#define m_BDVER (m_BDVER1 | m_BDVER2) +#define m_BTVER1 (1<<PROCESSOR_BTVER1) +#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1) #define m_GENERIC32 (1<<PROCESSOR_GENERIC32) #define m_GENERIC64 (1<<PROCESSOR_GENERIC64) @@ -1922,18 +1923,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { negatively, so enabling for Generic64 seems like good code size tradeoff. We can't enable it for 32bit generic because it does not work well with PPro base chips. */ - m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64, + m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64, /* X86_TUNE_PUSH_MEMORY */ - m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 - | m_NOCONA | m_CORE2I7 | m_GENERIC, + m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_ZERO_EXTEND_WITH_AND */ m_486 | m_PENT, /* X86_TUNE_UNROLL_STRLEN */ - m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6 - | m_CORE2I7 | m_GENERIC, + m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based on simulation result. But after P4 was made, no performance benefit @@ -1945,13 +1944,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { ~m_386, /* X86_TUNE_USE_SAHF */ - m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 - | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC, /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid partial dependencies. */ - m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA - | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */, + m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial register stalls on Generic32 compilation setting as well. However @@ -1970,13 +1967,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_386 | m_486 | m_K6_GEODE, /* X86_TUNE_USE_SIMODE_FIOP */ - ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC), + ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC), /* X86_TUNE_USE_MOV0 */ m_K6, /* X86_TUNE_USE_CLTD */ - ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC), + ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC), /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ m_PENT4, @@ -1991,14 +1988,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { ~(m_PENT | m_PPRO), /* X86_TUNE_PROMOTE_QIMODE */ - m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE - | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */, + m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_FAST_PREFIX */ - ~(m_PENT | m_486 | m_386), + ~(m_386 | m_486 | m_PENT), /* X86_TUNE_SINGLE_STRINGOP */ - m_386 | m_PENT4 | m_NOCONA, + m_386 | m_P4_NOCONA, /* X86_TUNE_QIMODE_MATH */ ~0, @@ -2033,11 +2029,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred for DFmode copies */ - ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 - | m_GENERIC | m_GEODE), + ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC), /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ - m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC, + m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a conflict here in between PPro/Pentium4 based chips that thread 128bit @@ -2048,14 +2043,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { shows that disabling this option on P4 brings over 20% SPECfp regression, while enabling it on K8 brings roughly 2.4% regression that can be partly masked by careful scheduling of moves. */ - m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC | m_AMDFAM10 - | m_BDVER, + m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC, /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */ - m_AMDFAM10 | m_BDVER | m_BTVER1 | m_COREI7, + m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1, /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */ - m_BDVER | m_COREI7, + m_COREI7 | m_BDVER, /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */ m_BDVER , @@ -2070,16 +2064,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_AMD_MULTIPLE, /* X86_TUNE_SSE_LOAD0_BY_PXOR */ - m_PPRO | m_PENT4 | m_NOCONA, + m_PPRO | m_P4_NOCONA, /* X86_TUNE_MEMORY_MISMATCH_STALL */ - m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC, + m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_PROLOGUE_USING_MOVE */ - m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC, + m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC, /* X86_TUNE_EPILOGUE_USING_MOVE */ - m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC, + m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC, /* X86_TUNE_SHIFT1 */ ~m_486, @@ -2095,34 +2089,31 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more than 4 branch instructions in the 16 byte window. */ - m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7 - | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_SCHEDULE */ - m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7 - | m_GENERIC, + m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_USE_BT */ - m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC, + m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_USE_INCDEC */ - ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM), + ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC), /* X86_TUNE_PAD_RETURNS */ - m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC, + m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC, /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */ m_ATOM, /* X86_TUNE_EXT_80387_CONSTANTS */ - m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO - | m_CORE2I7 | m_GENERIC, + m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC, /* X86_TUNE_SHORTEN_X87_SSE */ ~m_K8, /* X86_TUNE_AVOID_VECTOR_DECODE */ - m_K8 | m_CORE2I7_64 | m_GENERIC64, + m_CORE2I7_64 | m_K8 | m_GENERIC64, /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode and SImode multiply, but 386 and 486 do HImode multiply faster. */ @@ -2130,11 +2121,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is vector path on AMD machines. */ - m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER | m_BTVER1, + m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64, /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD machines. */ - m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER | m_BTVER1, + m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64, /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR than a MOV. */ @@ -2151,7 +2142,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion from FP to FP. */ - m_AMDFAM10 | m_CORE2I7 | m_GENERIC, + m_CORE2I7 | m_AMDFAM10 | m_GENERIC, /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion from integer to FP. */ @@ -2160,7 +2151,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction with a subsequent conditional jump instruction into a single compare-and-branch uop. */ - m_BDVER , + m_BDVER, /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ @@ -2203,12 +2194,10 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { }; static const unsigned int x86_accumulate_outgoing_args - = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 - | m_GENERIC; + = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC; static const unsigned int x86_arch_always_fancy_math_387 - = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 - | m_NOCONA | m_CORE2I7 | m_GENERIC; + = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC; static const unsigned int x86_avx256_split_unaligned_load = m_COREI7 | m_GENERIC; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6026c8..0451c67 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10963,9 +10963,7 @@ (set_attr "modrm" "0")]) (define_expand "indirect_jump" - [(set (pc) (match_operand 0 "nonimmediate_operand" ""))] - "" - "") + [(set (pc) (match_operand 0 "nonimmediate_operand" ""))]) (define_insn "*indirect_jump" [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))] |