diff options
author | Jan Hubicka <jh@suse.cz> | 2001-02-28 19:34:35 +0100 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2001-02-28 18:34:35 +0000 |
commit | b4e89e2d67f984f7d90a81185aa67dfdf6a1f7a1 (patch) | |
tree | 536673f2f2aea69af643b78eb5082b1f88e53417 | |
parent | 0073023ddeaacd50dd01a7e8dc88ef4cf832904e (diff) | |
download | gcc-b4e89e2d67f984f7d90a81185aa67dfdf6a1f7a1.zip gcc-b4e89e2d67f984f7d90a81185aa67dfdf6a1f7a1.tar.gz gcc-b4e89e2d67f984f7d90a81185aa67dfdf6a1f7a1.tar.bz2 |
i386.c (pentium4_cost): New.
* i386.c (pentium4_cost): New.
(m_PENT4): New macro.
(x86_push_memory, x86_movx,x86_cmove, x86_deep_branch, x86_use_sahf
x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8
x86_integer_DFmode_moves, x86_partial_reg_dependency,
x86_memory_mismatch_stall): Add Pentium4
(x86_use_q_reg, x86_use_any_reg): Kill.
(override_options): Add pentium4.
(incdec_operand): Return 0 for pentium4.
(ix86_issue_rate): Add PROCESSOR_PENTIUM4 and PROCESSOR_ATHLON.
* i386.h (x86_use_q_reg, x86_use_any_reg): Kill.
(TARGET_PENTIUM4): Define.
(enum processor_type): Add PROCESSOR_PENTIUM4.
(CPP_CPU_DEFAULT_SPEC): Add pentium4 support.
* i386.md (attribute "cpu"): Add pentium4.
* invoke.texi (march): Add pentium4.
From-SVN: r40134
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 71 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 2 | ||||
-rw-r--r-- | gcc/invoke.texi | 6 |
5 files changed, 86 insertions, 23 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8ed7bbe..4db5d52 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +Wed Feb 28 19:31:42 CET 2001 Jan Hubicka <jh@suse.cz> + + * i386.c (pentium4_cost): New. + (m_PENT4): New macro. + (x86_push_memory, x86_movx,x86_cmove, x86_deep_branch, x86_use_sahf + x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8 + x86_integer_DFmode_moves, x86_partial_reg_dependency, + x86_memory_mismatch_stall): Add Pentium4 + (x86_use_q_reg, x86_use_any_reg): Kill. + (override_options): Add pentium4. + (incdec_operand): Return 0 for pentium4. + (ix86_issue_rate): Add PROCESSOR_PENTIUM4 and PROCESSOR_ATHLON. + * i386.h (x86_use_q_reg, x86_use_any_reg): Kill. + (TARGET_PENTIUM4): Define. + (enum processor_type): Add PROCESSOR_PENTIUM4. + (CPP_CPU_DEFAULT_SPEC): Add pentium4 support. + * i386.md (attribute "cpu"): Add pentium4. + * invoke.texi (march): Add pentium4. + Wed Feb 28 19:28:06 CET 2001 Jan Hubicka <jh@suse.cz> * i386.md (sse_mov?fcc*): New patterns and splitters. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index da323ab..d287d1a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -239,6 +239,38 @@ struct processor_costs athlon_cost = { 6 /* MMX or SSE register to integer */ }; +struct processor_costs pentium4_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 8, /* variable shift costs */ + 8, /* constant shift costs */ + 30, /* cost of starting a multiply */ + 0, /* cost of multiply per each bit set */ + 112, /* cost of a divide/mod */ + 16, /* "large" insn */ + 6, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of loading integer registers */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 12, /* cost of moving SSE register */ + {12, 12, 12}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 10, /* MMX or SSE register to integer */ +}; + struct processor_costs *ix86_cost = &pentium_cost; /* Processor feature/optimization bitmasks. */ @@ -248,19 +280,18 @@ struct processor_costs *ix86_cost = &pentium_cost; #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) #define m_K6 (1<<PROCESSOR_K6) #define m_ATHLON (1<<PROCESSOR_ATHLON) +#define m_PENT4 (1<<PROCESSOR_PENTIUM4) const int x86_use_leave = m_386 | m_K6 | m_ATHLON; -const int x86_push_memory = m_386 | m_K6 | m_ATHLON; +const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4; const int x86_zero_extend_with_and = m_486 | m_PENT; -const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */; +const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */; const int x86_double_with_add = ~m_386; const int x86_use_bit_test = m_386; const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; -const int x86_use_q_reg = m_PENT | m_PPRO | m_K6; -const int x86_use_any_reg = m_486; -const int x86_cmove = m_PPRO | m_ATHLON; -const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON; -const int x86_use_sahf = m_PPRO | m_K6; +const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; +const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; +const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; const int x86_partial_reg_stall = m_PPRO; const int x86_use_loop = m_K6; const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); @@ -270,18 +301,18 @@ const int x86_read_modify_write = ~m_PENT; const int x86_read_modify = ~(m_PENT | m_PPRO); const int x86_split_long_moves = m_PPRO; const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486; -const int x86_single_stringop = m_386; +const int x86_single_stringop = m_386 | m_PENT4; const int x86_qimode_math = ~(0); const int x86_promote_qi_regs = 0; const int x86_himode_math = ~(m_PPRO); const int x86_promote_hi_regs = m_PPRO; -const int x86_sub_esp_4 = m_ATHLON | m_PPRO; -const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486; -const int x86_add_esp_4 = m_ATHLON | m_K6; -const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486; -const int x86_integer_DFmode_moves = ~m_ATHLON; -const int x86_partial_reg_dependency = m_ATHLON; -const int x86_memory_mismatch_stall = m_ATHLON; +const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; +const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; +const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; +const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; +const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4); +const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; +const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx)) @@ -577,7 +608,8 @@ override_options () {&pentium_cost, 0, 0, -4, -4, -4, 1}, {&pentiumpro_cost, 0, 0, 4, -4, 4, 1}, {&k6_cost, 0, 0, -5, -5, 4, 1}, - {&athlon_cost, 0, 0, 4, -4, 4, 1} + {&athlon_cost, 0, 0, 4, -4, 4, 1}, + {&pentium4_cost, 0, 0, 2, 2, 2, 1} }; static struct pta @@ -595,6 +627,7 @@ override_options () {"pentiumpro", PROCESSOR_PENTIUMPRO}, {"k6", PROCESSOR_K6}, {"athlon", PROCESSOR_ATHLON}, + {"pentium4", PROCESSOR_PENTIUM4}, }; int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); @@ -1202,6 +1235,10 @@ incdec_operand (op, mode) register rtx op; enum machine_mode mode; { + /* On Pentium4, the inc and dec operations causes extra dependancy on flag + registers, since carry flag is not set. */ + if (TARGET_PENTIUM4 && !optimize_size) + return 0; if (op == const1_rtx || op == constm1_rtx) return 1; if (GET_CODE (op) != CONST_INT) @@ -6900,6 +6937,8 @@ ix86_issue_rate () return 2; case PROCESSOR_PENTIUMPRO: + case PROCESSOR_PENTIUM4: + case PROCESSOR_ATHLON: return 3; default: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 1086757..b7a74af 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -184,11 +184,12 @@ extern int target_flags; #define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO) #define TARGET_K6 (ix86_cpu == PROCESSOR_K6) #define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON) +#define TARGET_PENTIUM4 (ix86_cpu == PROCESSOR_PENTIUM4) #define CPUMASK (1 << ix86_cpu) extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; extern const int x86_use_bit_test, x86_cmove, x86_deep_branch; -extern const int x86_unroll_strlen, x86_use_q_reg, x86_use_any_reg; +extern const int x86_unroll_strlen; extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx; extern const int x86_use_loop, x86_use_fiop, x86_use_mov0; extern const int x86_use_cltd, x86_read_modify_write; @@ -204,8 +205,6 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall; #define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & CPUMASK) #define TARGET_USE_BIT_TEST (x86_use_bit_test & CPUMASK) #define TARGET_UNROLL_STRLEN (x86_unroll_strlen & CPUMASK) -#define TARGET_USE_Q_REG (x86_use_q_reg & CPUMASK) -#define TARGET_USE_ANY_REG (x86_use_any_reg & CPUMASK) /* For sane SSE instruction set generation we need fcomi instruction. It is safe to enable all CMOVE instructions. */ #define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE) @@ -345,6 +344,7 @@ enum processor_type PROCESSOR_PENTIUMPRO, PROCESSOR_K6, PROCESSOR_ATHLON, + PROCESSOR_PENTIUM4, PROCESSOR_max }; @@ -431,6 +431,9 @@ extern int ix86_arch; #if TARGET_CPU_DEFAULT == 5 #define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__" #endif +#if TARGET_CPU_DEFAULT == 6 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__" +#endif #ifndef CPP_CPU_DEFAULT_SPEC #define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__" #endif @@ -449,12 +452,14 @@ extern int ix86_arch; %{!mcpu*:-D__tune_i686__ -D__tune_pentiumpro__ }}\ %{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\ %{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\ +%{mpentium4=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\ %{m386|mcpu=i386:-D__tune_i386__ }\ %{m486|mcpu=i486:-D__tune_i486__ }\ %{mpentium|mcpu=pentium|mcpu=i586:-D__tune_i586__ -D__tune_pentium__ }\ %{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_i686__ -D__tune_pentiumpro__ }\ %{mcpu=k6:-D__tune_k6__ }\ %{mcpu=athlon:-D__tune_athlon__ }\ +%{mcpu=pentium4:-D__tune_pentium4__ }\ %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}" #endif diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e6257a7..aa66d8e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -97,7 +97,7 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4" (const (symbol_ref "ix86_cpu"))) ;; A basic instruction type. Refinements due to arguments to be diff --git a/gcc/invoke.texi b/gcc/invoke.texi index 9804d0d..c95e774 100644 --- a/gcc/invoke.texi +++ b/gcc/invoke.texi @@ -6534,14 +6534,14 @@ These @samp{-m} options are defined for the i386 family of computers: Assume the defaults for the machine type @var{cpu type} when scheduling instructions. The choices for @var{cpu type} are @samp{i386}, @samp{i486}, @samp{i586}, @samp{i686}, @samp{pentium}, -@samp{pentiumpro}, @samp{k6}, and @samp{athlon} +@samp{pentiumpro}, @samp{pentium4}, @samp{k6}, and @samp{athlon} While picking a specific @var{cpu type} will schedule things appropriately for that particular chip, the compiler will not generate any code that does not run on the i386 without the @samp{-march=@var{cpu type}} option being used. @samp{i586} is equivalent to @samp{pentium} and @samp{i686} -is equivalent to @samp{pentiumpro}. @samp{k6} is the AMD chip as -opposed to the Intel ones. +is equivalent to @samp{pentiumpro}. @samp{k6} and @samp{athlon} sre the +AMD chips as opposed to the Intel ones. @item -march=@var{cpu type} Generate instructions for the machine type @var{cpu type}. The choices |