diff options
author | Jan Hubicka <hubicka@freesoft.cz> | 1999-12-10 00:54:58 +0100 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 1999-12-09 23:54:58 +0000 |
commit | 309ada50deb5fe88d3b703eb36cedfe221192386 (patch) | |
tree | 0a257cc5ec6e36ddb570193a11e352fe2c6447c9 | |
parent | 009fef522acfb6d3678d57bf9f5eae8fecfe381d (diff) | |
download | gcc-309ada50deb5fe88d3b703eb36cedfe221192386.zip gcc-309ada50deb5fe88d3b703eb36cedfe221192386.tar.gz gcc-309ada50deb5fe88d3b703eb36cedfe221192386.tar.bz2 |
i386.md (cpu attribute): Add "athlon".
* i386.md (cpu attribute): Add "athlon".
(athlon_decode): New attribute.
(Athlon scheduling units definitions): New.
(fcmp and shld patterns): Set athlon_decode to "vector".
* i386.c (athlon_cost): New.
(m_ATHLON): New.
(x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch,
x86_use_sahf): Set for Athlon.
(x86_use_fiop): Unset for Athlon.
(override_options): Define Athlon alignments and "athlon" name.
(x86_adjust_cost): Penalize AGI and delayed latencies for Athlon.
* i386.h (TARGET_ATHLON): New.
(enum processor_type): Add PROCESSOR_ATHLON.
(TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__"
for CPU_DEFAULT==6
(TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon.
From-SVN: r30852
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 57 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 131 |
4 files changed, 201 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index da221ac..630761d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +Fri Dec 10 00:52:13 MET 1999 Jan Hubicka <hubicka@freesoft.cz> + + * i386.md (cpu attribute): Add "athlon". + (athlon_decode): New attribute. + (Athlon scheduling units definitions): New. + (fcmp and shld patterns): Set athlon_decode to "vector". + * i386.c (athlon_cost): New. + (m_ATHLON): New. + (x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch, + x86_use_sahf): Set for Athlon. + (x86_use_fiop): Unset for Athlon. + (override_options): Define Athlon alignments and "athlon" name. + (x86_adjust_cost): Penalize AGI and delayed latencies for Athlon. + * i386.h (TARGET_ATHLON): New. + (enum processor_type): Add PROCESSOR_ATHLON. + (TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__" + for CPU_DEFAULT==5 + (TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon. + 1999-12-09 Andreas Jaeger <aj@suse.de> * loop.c (record_biv): Declare parameter as int. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cad1c38..d2069ee 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -155,6 +155,26 @@ struct processor_costs k6_cost = { {4, 4, 4} /* cost of loading integer registers */ }; +struct processor_costs athlon_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 1, /* variable shift costs */ + 1, /* constant shift costs */ + 5, /* cost of starting a multiply */ + 0, /* cost of multiply per each bit set */ + 19, /* cost of a divide/mod */ + 8, /* "large" insn */ + 4, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4} /* cost of loading integer registers */ +}; + struct processor_costs *ix86_cost = &pentium_cost; /* Processor feature/optimization bitmasks. */ @@ -163,22 +183,23 @@ struct processor_costs *ix86_cost = &pentium_cost; #define m_PENT (1<<PROCESSOR_PENTIUM) #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) #define m_K6 (1<<PROCESSOR_K6) +#define m_ATHLON (1<<PROCESSOR_ATHLON) -const int x86_use_leave = m_386 | m_K6; -const int x86_push_memory = m_386 | m_K6; +const int x86_use_leave = m_386 | m_K6 | m_ATHLON; +const int x86_push_memory = m_386 | m_K6 | m_ATHLON; const int x86_zero_extend_with_and = m_486 | m_PENT; -const int x86_movx = 0 /* m_386 | m_PPRO | m_K6 */; +const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */; const int x86_double_with_add = ~m_386; const int x86_use_bit_test = m_386; const int x86_unroll_strlen = m_486 | m_PENT; const int x86_use_q_reg = m_PENT | m_PPRO | m_K6; const int x86_use_any_reg = m_486; -const int x86_cmove = m_PPRO; -const int x86_deep_branch = m_PPRO | m_K6; -const int x86_use_sahf = m_PPRO | m_K6; +const int x86_cmove = m_PPRO | m_ATHLON; +const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON; +const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON; const int x86_partial_reg_stall = m_PPRO; const int x86_use_loop = m_K6; -const int x86_use_fiop = ~m_PPRO; +const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); const int x86_use_mov0 = m_K6; const int x86_use_cltd = ~(m_PENT | m_K6); const int x86_read_modify_write = ~m_PENT; @@ -334,7 +355,8 @@ override_options () {&i486_cost, 0, 0, 4, 4, 4, 1}, {&pentium_cost, 0, 0, -4, -4, -4, 1}, {&pentiumpro_cost, 0, 0, 4, -4, 4, 1}, - {&k6_cost, 0, 0, -5, -5, 4, 1} + {&k6_cost, 0, 0, -5, -5, 4, 1}, + {&athlon_cost, 0, 0, 4, -4, 4, 1} }; static struct pta @@ -351,6 +373,7 @@ override_options () {"i686", PROCESSOR_PENTIUMPRO}, {"pentiumpro", PROCESSOR_PENTIUMPRO}, {"k6", PROCESSOR_K6}, + {"athlon", PROCESSOR_ATHLON}, }; int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta); @@ -5582,9 +5605,9 @@ ix86_adjust_cost (insn, link, dep_insn, cost) rtx set, set2; int dep_insn_code_number; - /* We describe no anti or output depenancies. */ + /* Anti and output depenancies have zero cost on all CPUs. */ if (REG_NOTE_KIND (link) != 0) - return cost; + return 0; dep_insn_code_number = recog_memoized (dep_insn); @@ -5660,6 +5683,20 @@ ix86_adjust_cost (insn, link, dep_insn, cost) cost += 5; break; + case PROCESSOR_ATHLON: + /* Address Generation Interlock cause problems on the Athlon CPU because + the loads and stores are done in order so once one load or store has + to wait, others must too, so penalize the AGIs slightly by one cycle. + We might experiment with this value later. */ + if (ix86_agi_dependant (insn, dep_insn, insn_type)) + cost += 1; + + /* Since we can't represent delayed latencies of load+operation, + increase the cost here for non-imov insns. */ + if (dep_insn_type != TYPE_IMOV + && dep_insn_type != TYPE_FMOV + && get_attr_memory (dep_insn) == MEMORY_LOAD) + cost += 2; default: break; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index ab62050..ee78735 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -151,6 +151,7 @@ extern int target_flags; #define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM) #define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO) #define TARGET_K6 (ix86_cpu == PROCESSOR_K6) +#define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON) #define CPUMASK (1 << ix86_cpu) extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; @@ -245,6 +246,7 @@ enum processor_type PROCESSOR_PENTIUM, PROCESSOR_PENTIUMPRO, PROCESSOR_K6, + PROCESSOR_ATHLON, PROCESSOR_max }; @@ -326,6 +328,9 @@ extern int ix86_arch; #if TARGET_CPU_DEFAULT == 4 #define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__" #endif +#if TARGET_CPU_DEFAULT == 5 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__" +#endif #ifndef CPP_CPU_DEFAULT_SPEC #define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__" #endif @@ -342,11 +347,13 @@ extern int ix86_arch; %{march=pentiumpro|march=i686:-D__pentiumpro -D__pentiumpro__ \ %{!mcpu*:-D__tune_pentiumpro__ }}\ %{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\ +%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\ %{m386|mcpu=i386:-D__tune_i386__ }\ %{m486|mcpu=i486:-D__tune_i486__ }\ %{mpentium|mcpu=pentium|mcpu=i586:-D__tune_pentium__ }\ %{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_pentiumpro__ }\ %{mcpu=k6:-D__tune_k6__ }\ +%{mcpu=athlon:-D__tune_athlon__ }\ %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}" #endif diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 475e860..cb0ed32 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -77,7 +77,7 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon" (const (symbol_ref "ix86_cpu"))) ;; A basic instruction type. Refinements due to arguments to be @@ -693,6 +693,128 @@ (eq_attr "type" "idiv")) 17 17) +;; AMD Athlon Scheduling +;; +;; The Athlon does contain three pipelined FP units, three integer units and +;; three address generation units. +;; +;; The predecode logic is determining boundaries of instructions in the 64 +;; byte cache line. So the cache line straddling problem of K6 might be issue +;; here as well, but it is not noted in the documentation. +;; +;; Three DirectPath instructions decoders and only one VectorPath decoder +;; is available. They can decode three DirectPath instructions or one VectorPath +;; instruction per cycle. +;; Decoded macro instructions are then passed to 72 entry instruction control +;; unit, that passes +;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. +;; +;; The load/store queue unit is not attached to the schedulers but +;; communicates with all the execution units seperately instead. + +(define_attr "athlon_decode" "direct,vector" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") + (and (eq_attr "type" "fmov") + (ior (match_operand:XF 0 "memory_operand" "") + (match_operand:XF 1 "memory_operand" ""))) + (const_string "vector")] + (const_string "direct"))) + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "vector")) + 1 1) + +(define_function_unit "athlon_directdec" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1) + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1 [(eq_attr "athlon_decode" "vector")]) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov")) + 1 1) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 4 0) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 27 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 27 27) + +(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all" + (cond [(eq_attr "type" "fop,fop1,fcmp") + (const_string "add") + (eq_attr "type" "fmul,fdiv,fpspc,fsgn") + (const_string "mul") + (and (eq_attr "type" "fmov") (eq_attr "memory" "!none")) + (const_string "store") + (and (eq_attr "type" "fmov") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "store") + (eq_attr "type" "fmov") + (const_string "muladd") + (eq_attr "type" "fcmov") + (const_string "all")] + (const_string "none"))) + +(define_function_unit "athlon_fp_mul" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "mul,all")) + 4 1) + +(define_function_unit "athlon_fp_add" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "add,all")) + 4 1) + +(define_function_unit "athlon_fp_muladd" 2 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "athlon_fpunits" "muladd,mul,add,all"))) + 2 1) + +(define_function_unit "athlon_fp_muladd" 2 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "!fmov") + (eq_attr "athlon_fpunits" "muladd,mul,add,all"))) + 4 1) + +(define_function_unit "athlon_fp_store" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "store,all")) + 1 1) + +(define_function_unit "athlon_agu" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "memory" "!none") + (eq_attr "athlon_fpunits" "none"))) + 1 1) + + ;; Compare instructions. ;; All compare insns have expanders that save the operands away without @@ -1095,7 +1217,8 @@ && FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp")]) + [(set_attr "type" "fcmp") + (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu" [(set (reg:CCFPU 17) @@ -1105,7 +1228,8 @@ && FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "fcmp")]) + [(set_attr "type" "fcmp") + (set_attr "athlon_decode" "vector")]) ;; Move instructions. @@ -5440,6 +5564,7 @@ [(set_attr "type" "ishift") (set_attr "length_opcode" "3") (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) (define_expand "x86_shift_adj_1" |