aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@freesoft.cz>1999-12-10 00:54:58 +0100
committerJan Hubicka <hubicka@gcc.gnu.org>1999-12-09 23:54:58 +0000
commit309ada50deb5fe88d3b703eb36cedfe221192386 (patch)
tree0a257cc5ec6e36ddb570193a11e352fe2c6447c9
parent009fef522acfb6d3678d57bf9f5eae8fecfe381d (diff)
downloadgcc-309ada50deb5fe88d3b703eb36cedfe221192386.zip
gcc-309ada50deb5fe88d3b703eb36cedfe221192386.tar.gz
gcc-309ada50deb5fe88d3b703eb36cedfe221192386.tar.bz2
i386.md (cpu attribute): Add "athlon".
* i386.md (cpu attribute): Add "athlon". (athlon_decode): New attribute. (Athlon scheduling units definitions): New. (fcmp and shld patterns): Set athlon_decode to "vector". * i386.c (athlon_cost): New. (m_ATHLON): New. (x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch, x86_use_sahf): Set for Athlon. (x86_use_fiop): Unset for Athlon. (override_options): Define Athlon alignments and "athlon" name. (x86_adjust_cost): Penalize AGI and delayed latencies for Athlon. * i386.h (TARGET_ATHLON): New. (enum processor_type): Add PROCESSOR_ATHLON. (TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__" for CPU_DEFAULT==6 (TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon. From-SVN: r30852
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/config/i386/i386.c57
-rw-r--r--gcc/config/i386/i386.h7
-rw-r--r--gcc/config/i386/i386.md131
4 files changed, 201 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index da221ac..630761d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+Fri Dec 10 00:52:13 MET 1999 Jan Hubicka <hubicka@freesoft.cz>
+
+ * i386.md (cpu attribute): Add "athlon".
+ (athlon_decode): New attribute.
+ (Athlon scheduling units definitions): New.
+ (fcmp and shld patterns): Set athlon_decode to "vector".
+ * i386.c (athlon_cost): New.
+ (m_ATHLON): New.
+ (x86_use_leave, x86_push_memory, x86_movx, x86_cmove, x86_deep_branch,
+ x86_use_sahf): Set for Athlon.
+ (x86_use_fiop): Unset for Athlon.
+ (override_options): Define Athlon alignments and "athlon" name.
+ (x86_adjust_cost): Penalize AGI and delayed latencies for Athlon.
+ * i386.h (TARGET_ATHLON): New.
+ (enum processor_type): Add PROCESSOR_ATHLON.
+ (TARGET_CPU_DEFAULT_SPEC): Set to "-D__tune_athlon__"
+ for CPU_DEFAULT==5
+ (TARGET_CPP_CPU_SPECS): Set -D__tune_athlon__ for Athlon.
+
1999-12-09 Andreas Jaeger <aj@suse.de>
* loop.c (record_biv): Declare parameter as int.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cad1c38..d2069ee 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -155,6 +155,26 @@ struct processor_costs k6_cost = {
{4, 4, 4} /* cost of loading integer registers */
};
+struct processor_costs athlon_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 1, /* variable shift costs */
+ 1, /* constant shift costs */
+ 5, /* cost of starting a multiply */
+ 0, /* cost of multiply per each bit set */
+ 19, /* cost of a divide/mod */
+ 8, /* "large" insn */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4} /* cost of loading integer registers */
+};
+
struct processor_costs *ix86_cost = &pentium_cost;
/* Processor feature/optimization bitmasks. */
@@ -163,22 +183,23 @@ struct processor_costs *ix86_cost = &pentium_cost;
#define m_PENT (1<<PROCESSOR_PENTIUM)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
#define m_K6 (1<<PROCESSOR_K6)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
-const int x86_use_leave = m_386 | m_K6;
-const int x86_push_memory = m_386 | m_K6;
+const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
+const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
const int x86_zero_extend_with_and = m_486 | m_PENT;
-const int x86_movx = 0 /* m_386 | m_PPRO | m_K6 */;
+const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT;
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
const int x86_use_any_reg = m_486;
-const int x86_cmove = m_PPRO;
-const int x86_deep_branch = m_PPRO | m_K6;
-const int x86_use_sahf = m_PPRO | m_K6;
+const int x86_cmove = m_PPRO | m_ATHLON;
+const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
+const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
const int x86_partial_reg_stall = m_PPRO;
const int x86_use_loop = m_K6;
-const int x86_use_fiop = ~m_PPRO;
+const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
const int x86_use_mov0 = m_K6;
const int x86_use_cltd = ~(m_PENT | m_K6);
const int x86_read_modify_write = ~m_PENT;
@@ -334,7 +355,8 @@ override_options ()
{&i486_cost, 0, 0, 4, 4, 4, 1},
{&pentium_cost, 0, 0, -4, -4, -4, 1},
{&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
- {&k6_cost, 0, 0, -5, -5, 4, 1}
+ {&k6_cost, 0, 0, -5, -5, 4, 1},
+ {&athlon_cost, 0, 0, 4, -4, 4, 1}
};
static struct pta
@@ -351,6 +373,7 @@ override_options ()
{"i686", PROCESSOR_PENTIUMPRO},
{"pentiumpro", PROCESSOR_PENTIUMPRO},
{"k6", PROCESSOR_K6},
+ {"athlon", PROCESSOR_ATHLON},
};
int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
@@ -5582,9 +5605,9 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
rtx set, set2;
int dep_insn_code_number;
- /* We describe no anti or output depenancies. */
+ /* Anti and output depenancies have zero cost on all CPUs. */
if (REG_NOTE_KIND (link) != 0)
- return cost;
+ return 0;
dep_insn_code_number = recog_memoized (dep_insn);
@@ -5660,6 +5683,20 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
cost += 5;
break;
+ case PROCESSOR_ATHLON:
+ /* Address Generation Interlock cause problems on the Athlon CPU because
+ the loads and stores are done in order so once one load or store has
+ to wait, others must too, so penalize the AGIs slightly by one cycle.
+ We might experiment with this value later. */
+ if (ix86_agi_dependant (insn, dep_insn, insn_type))
+ cost += 1;
+
+ /* Since we can't represent delayed latencies of load+operation,
+ increase the cost here for non-imov insns. */
+ if (dep_insn_type != TYPE_IMOV
+ && dep_insn_type != TYPE_FMOV
+ && get_attr_memory (dep_insn) == MEMORY_LOAD)
+ cost += 2;
default:
break;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index ab62050..ee78735 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -151,6 +151,7 @@ extern int target_flags;
#define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM)
#define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO)
#define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
+#define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON)
#define CPUMASK (1 << ix86_cpu)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
@@ -245,6 +246,7 @@ enum processor_type
PROCESSOR_PENTIUM,
PROCESSOR_PENTIUMPRO,
PROCESSOR_K6,
+ PROCESSOR_ATHLON,
PROCESSOR_max
};
@@ -326,6 +328,9 @@ extern int ix86_arch;
#if TARGET_CPU_DEFAULT == 4
#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__"
#endif
+#if TARGET_CPU_DEFAULT == 5
+#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__"
+#endif
#ifndef CPP_CPU_DEFAULT_SPEC
#define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__"
#endif
@@ -342,11 +347,13 @@ extern int ix86_arch;
%{march=pentiumpro|march=i686:-D__pentiumpro -D__pentiumpro__ \
%{!mcpu*:-D__tune_pentiumpro__ }}\
%{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\
+%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\
%{m386|mcpu=i386:-D__tune_i386__ }\
%{m486|mcpu=i486:-D__tune_i486__ }\
%{mpentium|mcpu=pentium|mcpu=i586:-D__tune_pentium__ }\
%{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_pentiumpro__ }\
%{mcpu=k6:-D__tune_k6__ }\
+%{mcpu=athlon:-D__tune_athlon__ }\
%{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
#endif
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 475e860..cb0ed32 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -77,7 +77,7 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon"
(const (symbol_ref "ix86_cpu")))
;; A basic instruction type. Refinements due to arguments to be
@@ -693,6 +693,128 @@
(eq_attr "type" "idiv"))
17 17)
+;; AMD Athlon Scheduling
+;;
+;; The Athlon does contain three pipelined FP units, three integer units and
+;; three address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line. So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available. They can decode three DirectPath instructions or one VectorPath
+;; instruction per cycle.
+;; Decoded macro instructions are then passed to 72 entry instruction control
+;; unit, that passes
+;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units seperately instead.
+
+(define_attr "athlon_decode" "direct,vector"
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc")
+ (const_string "vector")
+ (and (eq_attr "type" "push")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")
+ (and (eq_attr "type" "fmov")
+ (ior (match_operand:XF 0 "memory_operand" "")
+ (match_operand:XF 1 "memory_operand" "")))
+ (const_string "vector")]
+ (const_string "direct")))
+
+(define_function_unit "athlon_vectordec" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_decode" "vector"))
+ 1 1)
+
+(define_function_unit "athlon_directdec" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_decode" "direct"))
+ 1 1)
+
+(define_function_unit "athlon_vectordec" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_decode" "direct"))
+ 1 1 [(eq_attr "athlon_decode" "vector")])
+
+(define_function_unit "athlon_ieu" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov"))
+ 1 1)
+
+(define_function_unit "athlon_ieu" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "imul"))
+ 4 0)
+
+(define_function_unit "athlon_ieu" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "idiv"))
+ 27 0)
+
+(define_function_unit "athlon_muldiv" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "imul"))
+ 5 0)
+
+(define_function_unit "athlon_muldiv" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "idiv"))
+ 27 27)
+
+(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all"
+ (cond [(eq_attr "type" "fop,fop1,fcmp")
+ (const_string "add")
+ (eq_attr "type" "fmul,fdiv,fpspc,fsgn")
+ (const_string "mul")
+ (and (eq_attr "type" "fmov") (eq_attr "memory" "!none"))
+ (const_string "store")
+ (and (eq_attr "type" "fmov")
+ (ior (match_operand:SI 1 "register_operand" "")
+ (match_operand 1 "immediate_operand" "")))
+ (const_string "store")
+ (eq_attr "type" "fmov")
+ (const_string "muladd")
+ (eq_attr "type" "fcmov")
+ (const_string "all")]
+ (const_string "none")))
+
+(define_function_unit "athlon_fp_mul" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_fpunits" "mul,all"))
+ 4 1)
+
+(define_function_unit "athlon_fp_add" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_fpunits" "add,all"))
+ 4 1)
+
+(define_function_unit "athlon_fp_muladd" 2 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "athlon_fpunits" "muladd,mul,add,all")))
+ 2 1)
+
+(define_function_unit "athlon_fp_muladd" 2 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "!fmov")
+ (eq_attr "athlon_fpunits" "muladd,mul,add,all")))
+ 4 1)
+
+(define_function_unit "athlon_fp_store" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_fpunits" "store,all"))
+ 1 1)
+
+(define_function_unit "athlon_agu" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "athlon_fpunits" "none")))
+ 1 1)
+
+
;; Compare instructions.
;; All compare insns have expanders that save the operands away without
@@ -1095,7 +1217,8 @@
&& FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
- [(set_attr "type" "fcmp")])
+ [(set_attr "type" "fcmp")
+ (set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_iu"
[(set (reg:CCFPU 17)
@@ -1105,7 +1228,8 @@
&& FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
- [(set_attr "type" "fcmp")])
+ [(set_attr "type" "fcmp")
+ (set_attr "athlon_decode" "vector")])
;; Move instructions.
@@ -5440,6 +5564,7 @@
[(set_attr "type" "ishift")
(set_attr "length_opcode" "3")
(set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
(define_expand "x86_shift_adj_1"