From b54cf83ae659949de888bb3ec2797b7858e6ee1a Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Sat, 15 Feb 2003 21:19:01 +0000 Subject: rs6000.h (processor_type): Add PPC440. * config/rs6000/rs6000.h (processor_type): Add PPC440. * config/rs6000/rs6000.c (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE, TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD, TARGET_SCHED_VARIABLE_ISSUE): Define. (rs6000_use_dfa_pipeline_interface): New function. (rs6000_multipass_dfa_lookahead): New Function. (rs6000_variable_issue): New function. (rs6000_adjust_cost): Add CMP and DELAYED_CR types. (rs6000_issue_rate): Add PPC440. * config/rs6000/rs6000.md (unspec list): Correct typo. (attr "type"): Add load_ext, load_ext_u, load_ext_ux, load_u, store_ux, store_u, fpload_ux, fpload_u, fpstore_ux, fpstore_u, cmp, delayed_cr, mfcr, mtcr. (automata_option): Set "ndfa". (extendMMNN2): Update attributes. (movcc_internal1): Discourage move to non-cr0. Update attributes. (movMM_update): Update attributes. (cmpMM_internal): Update attributes. (sCC CR materialization): Update attributes. (branch patterns): Do not discourage non-cr0. (cr logical patterns): Prefer destructive register allocation. Update attributes. (movesi_from_cr): Update attribute. (mtcrf_operation): Update attribute. (mtcrfsi): Update attribute. * config/rs6000/40x.md: New file. * config/rs6000/603.md: New file. * config/rs6000/6xx.md: New file. * config/rs6000/7450.md: New file. * config/rs6000/7xx.md: New file. * config/rs6000/mpc.md: New file. * config/rs6000/power4.md: New file. * config/rs6000/rios1.md: New file. * config/rs6000/rios2.md: New file. * config/rs6000/rs64.md: New file. [Some DFA descriptions based on work by Michael Hayes] From-SVN: r62943 --- gcc/ChangeLog | 40 ++ gcc/config/rs6000/40x.md | 67 ++++ gcc/config/rs6000/603.md | 107 +++++ gcc/config/rs6000/6xx.md | 203 ++++++++++ gcc/config/rs6000/7450.md | 137 +++++++ gcc/config/rs6000/7xx.md | 142 +++++++ gcc/config/rs6000/mpc.md | 79 ++++ gcc/config/rs6000/power4.md | 281 +++++++++++++ gcc/config/rs6000/rios1.md | 144 +++++++ gcc/config/rs6000/rios2.md | 82 ++++ gcc/config/rs6000/rs6000.c | 68 +++- gcc/config/rs6000/rs6000.h | 1 + gcc/config/rs6000/rs6000.md | 947 ++++---------------------------------------- gcc/config/rs6000/rs64.md | 103 +++++ 14 files changed, 1537 insertions(+), 864 deletions(-) create mode 100644 gcc/config/rs6000/40x.md create mode 100644 gcc/config/rs6000/603.md create mode 100644 gcc/config/rs6000/6xx.md create mode 100644 gcc/config/rs6000/7450.md create mode 100644 gcc/config/rs6000/7xx.md create mode 100644 gcc/config/rs6000/mpc.md create mode 100644 gcc/config/rs6000/power4.md create mode 100644 gcc/config/rs6000/rios1.md create mode 100644 gcc/config/rs6000/rios2.md create mode 100644 gcc/config/rs6000/rs64.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bba56b8..adb0f78 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2003-02-15 David Edelsohn + + * config/rs6000/rs6000.h (processor_type): Add PPC440. + * config/rs6000/rs6000.c (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE, + TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD, + TARGET_SCHED_VARIABLE_ISSUE): Define. + (rs6000_use_dfa_pipeline_interface): New function. + (rs6000_multipass_dfa_lookahead): New Function. + (rs6000_variable_issue): New function. + (rs6000_adjust_cost): Add CMP and DELAYED_CR types. + (rs6000_issue_rate): Add PPC440. + * config/rs6000/rs6000.md (unspec list): Correct typo. + (attr "type"): Add load_ext, load_ext_u, load_ext_ux, load_u, + store_ux, store_u, fpload_ux, fpload_u, fpstore_ux, fpstore_u, + cmp, delayed_cr, mfcr, mtcr. + (automata_option): Set "ndfa". + (extendMMNN2): Update attributes. + (movcc_internal1): Discourage move to non-cr0. Update + attributes. + (movMM_update): Update attributes. + (cmpMM_internal): Update attributes. + (sCC CR materialization): Update attributes. + (branch patterns): Do not discourage non-cr0. + (cr logical patterns): Prefer destructive register allocation. + Update attributes. + (movesi_from_cr): Update attribute. + (mtcrf_operation): Update attribute. + (mtcrfsi): Update attribute. + * config/rs6000/40x.md: New file. + * config/rs6000/603.md: New file. + * config/rs6000/6xx.md: New file. + * config/rs6000/7450.md: New file. + * config/rs6000/7xx.md: New file. + * config/rs6000/mpc.md: New file. + * config/rs6000/power4.md: New file. + * config/rs6000/rios1.md: New file. + * config/rs6000/rios2.md: New file. + * config/rs6000/rs64.md: New file. + [Some DFA descriptions based on work by Michael Hayes] + 2003-02-15 Richard Henderson * cfgcleanup.c: Include params.h. diff --git a/gcc/config/rs6000/40x.md b/gcc/config/rs6000/40x.md new file mode 100644 index 0000000..e44a011 --- /dev/null +++ b/gcc/config/rs6000/40x.md @@ -0,0 +1,67 @@ +(define_automaton "ppc40x") +(define_cpu_unit "iu_40x,bpu_40x" "ppc40x") + +;; PPC401 / PPC403 / PPC405 32-bit integer only IU BPU +;; Embedded PowerPC controller +;; In-order execution +;; Max issue two insns/cycle (includes one branch) +(define_insn_reservation "ppc403-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-store" 1 + (and (eq_attr "type" "store,store_ux,store_u") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-compare" 3 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x,nothing,bpu_40x") + +(define_insn_reservation "ppc403-imul" 4 + (and (eq_attr "type" "imul,imul2,imul3") + (eq_attr "cpu" "ppc403")) + "iu_40x*4") + +(define_insn_reservation "ppc405-imul" 5 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppc405")) + "iu_40x*4") + +(define_insn_reservation "ppc405-imul2" 3 + (and (eq_attr "type" "imul2") + (eq_attr "cpu" "ppc405")) + "iu_40x*2") + +(define_insn_reservation "ppc405-imul3" 2 + (and (eq_attr "type" "imul3") + (eq_attr "cpu" "ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-idiv" 33 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x*33") + +(define_insn_reservation "ppc403-mfcr" 2 + (and (eq_attr "type" "mfcr,mtcr") + (eq_attr "cpu" "ppc403,ppc405")) + "iu_40x") + +(define_insn_reservation "ppc403-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc403,ppc405")) + "bpu_40x") + +(define_insn_reservation "ppc403-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr") + (eq_attr "cpu" "ppc403,ppc405")) + "bpu_40x") + diff --git a/gcc/config/rs6000/603.md b/gcc/config/rs6000/603.md new file mode 100644 index 0000000..37d95d3 --- /dev/null +++ b/gcc/config/rs6000/603.md @@ -0,0 +1,107 @@ +(define_automaton "ppc603,ppc603fp,ppc603other") +(define_cpu_unit "iu_603" "ppc603") +(define_cpu_unit "fpu_603" "ppc603fp") +(define_cpu_unit "lsu_603,bpu_603,sru_603" "ppc603other") + +;; PPC603/PPC603e 32-bit IU, LSU, FPU, BPU, SRU +;; Max issue 3 insns/clock cycle (includes 1 branch) + +;; Branches go straight to the BPU. All other insns are handled +;; by a dispatch unit which can issue a max of 2 insns per cycle. + +;; The PPC603e user's manual recommends that to reduce branch mispredictions, +;; the insn that sets CR bits should be separated from the branch insn +;; that evaluates them; separation by more than 9 insns ensures that the CR +;; bits will be immediately available for execution. +;; This could be artificially achieved by exagerating the latency of +;; compare insns but at the expense of a poorer schedule. + +;; CR insns get executed in the SRU. Not modelled. + +(define_insn_reservation "ppc603-load" 2 + (and (eq_attr "type" "load,load_ext,load_ux,load_u") + (eq_attr "cpu" "ppc603")) + "lsu_603") + +(define_insn_reservation "ppc603-store" 1 + (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "ppc603")) + "lsu_603") + +(define_insn_reservation "ppc603-fpload" 2 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "ppc603")) + "lsu_603") + +(define_insn_reservation "ppc603-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "ppc603")) + "iu_603") + +; This takes 2 or 3 cycles +(define_insn_reservation "ppc603-imul" 3 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppc603")) + "iu_603*2") + +(define_insn_reservation "ppc603-imul2" 2 + (and (eq_attr "type" "imul2,imul3") + (eq_attr "cpu" "ppc603")) + "iu_603*2") + +(define_insn_reservation "ppc603-idiv" 37 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc603")) + "iu_603*37") + +(define_insn_reservation "ppc603-compare" 3 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "ppc603")) + "iu_603,nothing,bpu_603") + +(define_insn_reservation "ppc603-fpcompare" 3 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc603")) + "(fpu_603+iu_603*2),bpu_603") + +(define_insn_reservation "ppc603-fp" 3 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "ppc603")) + "fpu_603") + +(define_insn_reservation "ppc603-dmul" 4 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc603")) + "fpu_603*2") + +; Divides are not pipelined +(define_insn_reservation "ppc603-sdiv" 18 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc603")) + "fpu_603*18") + +(define_insn_reservation "ppc603-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc603")) + "fpu_603*33") + +(define_insn_reservation "ppc603-mtcr" 2 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc603")) + "sru_603") + +(define_insn_reservation "ppc603-crlogical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr,mfcr") + (eq_attr "cpu" "ppc603")) + "sru_603") + +(define_insn_reservation "ppc603-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc603")) + "bpu_603") + +(define_insn_reservation "ppc603-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "ppc603")) + "bpu_603") + diff --git a/gcc/config/rs6000/6xx.md b/gcc/config/rs6000/6xx.md new file mode 100644 index 0000000..b0b4490 --- /dev/null +++ b/gcc/config/rs6000/6xx.md @@ -0,0 +1,203 @@ +(define_automaton "ppc6xx,ppc6xxfp,ppc6xxfp2,ppc6xxother") +(define_cpu_unit "iu1_6xx,iu2_6xx,mciu_6xx" "ppc6xx") +(define_cpu_unit "fpu_6xx" "ppc6xxfp") +(define_cpu_unit "fpu1_6xx,fpu2_6xx" "ppc6xxfp2") +(define_cpu_unit "lsu_6xx,bpu_6xx,cru_6xx" "ppc6xxother") + +;; PPC604 32-bit 2xSCIU, MCIU, LSU, FPU, BPU +;; PPC604e 32-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU +;; MCIU used for imul/idiv and moves from/to spr +;; LSU 2 stage pipelined +;; FPU 3 stage pipelined +;; Max issue 4 insns/clock cycle + +;; PPC604e is PPC604 with larger caches and a CRU. In the 604 +;; the CR logical operations are handled in the BPU. +;; In the 604e, the CRU shares bus with BPU so only one condition +;; register or branch insn can be issued per clock. Not modelled. + +;; PPC620 64-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU +;; PPC630 64-bit 2xSCIU, MCIU, LSU, 2xFPU, BPU, CRU +;; Max issue 4 insns/clock cycle +;; Out-of-order execution, in-order completion + +;; No following instruction can dispatch in the same cycle as a branch +;; instruction. Not modelled. This is no problem if RCSP is not +;; enabled since the scheduler stops a schedule when it gets to a branch. + +;; Four insns can be dispatched per cycle. + +(define_insn_reservation "ppc604-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-fpload" 3 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-store" 1 + (and (eq_attr "type" "store,fpstore,store_ux,store_u,fpstore_ux,fpstore_u") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "lsu_6xx") + +(define_insn_reservation "ppc604-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "iu1_6xx|iu2_6xx") + +(define_insn_reservation "ppc604-imul" 4 + (and (eq_attr "type" "imul,imul2,imul3") + (eq_attr "cpu" "ppc604")) + "mciu_6xx*2") + +(define_insn_reservation "ppc604e-imul" 2 + (and (eq_attr "type" "imul,imul2,imul3") + (eq_attr "cpu" "ppc604e")) + "mciu_6xx") + +(define_insn_reservation "ppc620-imul" 5 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*3") + +(define_insn_reservation "ppc620-imul2" 4 + (and (eq_attr "type" "imul2") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*3") + +(define_insn_reservation "ppc620-imul3" 3 + (and (eq_attr "type" "imul3") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*3") + +(define_insn_reservation "ppc620-lmul" 7 + (and (eq_attr "type" "lmul") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*5") + +(define_insn_reservation "ppc604-idiv" 20 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc604,ppc604e")) + "mciu_6xx*19") + +(define_insn_reservation "ppc620-idiv" 37 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc620")) + "mciu_6xx*36") + +(define_insn_reservation "ppc630-idiv" 21 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc630")) + "mciu_6xx*20") + +(define_insn_reservation "ppc620-ldiv" 37 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "ppc620,ppc630")) + "mciu_6xx*36") + +(define_insn_reservation "ppc604-compare" 3 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "(iu1_6xx|iu2_6xx)") + +; FPU PPC604{,e},PPC620 +(define_insn_reservation "ppc604-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx") + +(define_insn_reservation "ppc604-fp" 3 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx") + +(define_insn_reservation "ppc604-dmul" 3 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx") + +; Divides are not pipelined +(define_insn_reservation "ppc604-sdiv" 18 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx*18") + +(define_insn_reservation "ppc604-ddiv" 32 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) + "fpu_6xx*32") + +(define_insn_reservation "ppc620-ssqrt" 31 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppc620")) + "fpu_6xx*31") + +(define_insn_reservation "ppc620-dsqrt" 31 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppc620")) + "fpu_6xx*31") + + +; 2xFPU PPC630 +(define_insn_reservation "ppc630-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc630")) + "(fpu1_6xx|fpu2_6xx)") + +(define_insn_reservation "ppc630-fp" 3 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx|fpu2_6xx") + +(define_insn_reservation "ppc630-sdiv" 17 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*17|fpu2_6xx*17") + +(define_insn_reservation "ppc630-ddiv" 21 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*21|fpu2_6xx*21") + +(define_insn_reservation "ppc630-ssqrt" 18 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*18|fpu2_6xx*18") + +(define_insn_reservation "ppc630-dsqrt" 25 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "ppc630")) + "fpu1_6xx*25|fpu2_6xx*25") + +(define_insn_reservation "ppc604-mfcr" 3 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "mciu_6xx") + +(define_insn_reservation "ppc604-mtcr" 2 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "mciu_6xx") + +(define_insn_reservation "ppc604-crlogical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc604")) + "bpu_6xx") + +(define_insn_reservation "ppc604e-crlogical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc604e,ppc620,ppc630")) + "cru_6xx") + +(define_insn_reservation "ppc604-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "bpu_6xx") + +(define_insn_reservation "ppc604-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) + "bpu_6xx") + diff --git a/gcc/config/rs6000/7450.md b/gcc/config/rs6000/7450.md new file mode 100644 index 0000000..12bcbd4 --- /dev/null +++ b/gcc/config/rs6000/7450.md @@ -0,0 +1,137 @@ +(define_automaton "ppc7450,ppc7450fp,ppc7450other,ppc7450vec") +(define_cpu_unit "iu1_7450,iu2_7450,iu3_7450,mciu_7450" "ppc7450") +(define_cpu_unit "fpu_7450" "ppc7450fp") +(define_cpu_unit "lsu_7450,bpu_7450" "ppc7450other") +(define_cpu_unit "du1_7450,du2_7450,du3_7450" "ppc7450") +(define_cpu_unit "vecsmpl_7450,veccmplx_7450,vecflt_7450,vecperm_7450" "ppc7450vec") +(define_cpu_unit "vdu1_7450,vdu2_7450" "ppc7450vec") + + +;; PPC7450 32-bit 3xIU, MCIU, LSU, SRU, FPU, BPU, 4xVEC +;; IU1,IU2,IU3 can perform all integer operations +;; MCIU performs imul and idiv, cr logical, SPR moves +;; LSU 2 stage pipelined +;; FPU 3 stage pipelined +;; It also has 4 vector units, one for each type of vector instruction. +;; However, we can only dispatch 2 instructions per cycle. +;; Max issue 3 insns/clock cycle (includes 1 branch) +;; In-order execution + +;; Branches go straight to the BPU. All other insns are handled +;; by a dispatch unit which can issue a max of 3 insns per cycle. +(define_reservation "ppc7450_du" "du1_7450|du2_7450|du3_7450") +(define_reservation "ppc7450_vec_du" "vdu1_7450|vdu2_7450") + +(define_insn_reservation "ppc7450-load" 3 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\ + load_ux,load_u,vecload") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-store" 3 + (and (eq_attr "type" "store,store_ux,store_u,vecstore") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-fpload" 4 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450") + +(define_insn_reservation "ppc7450-fpstore" 3 + (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,lsu_7450*3") + +(define_insn_reservation "ppc7450-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,(iu1_7450|iu2_7450|iu3_7450)") + +(define_insn_reservation "ppc7450-imul" 4 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450*2") + +(define_insn_reservation "ppc7450-imul2" 3 + (and (eq_attr "type" "imul2,imul3") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450") + +(define_insn_reservation "ppc7450-idiv" 23 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450*23") + +(define_insn_reservation "ppc7450-compare" 2 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,(iu1_7450|iu2_7450|iu3_7450)") + +(define_insn_reservation "ppc7450-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450") + +(define_insn_reservation "ppc7450-fp" 5 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450") + +; Divides are not pipelined +(define_insn_reservation "ppc7450-sdiv" 21 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450*21") + +(define_insn_reservation "ppc7450-ddiv" 35 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,fpu_7450*35") + +(define_insn_reservation "ppc7450-mfcr" 2 + (and (eq_attr "type" "mfcr,mtcr") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450") + +(define_insn_reservation "ppc7450-crlogical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,mciu_7450") + +(define_insn_reservation "ppc7450-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc7450")) + "nothing,mciu_7450*2") + +(define_insn_reservation "ppc7450-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "ppc7450")) + "nothing,bpu_7450") + +;; Altivec +(define_insn_reservation "ppc7450-vecsimple" 1 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,vecsmpl_7450") + +(define_insn_reservation "ppc7450-veccomplex" 4 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,veccmplx_7450") + +(define_insn_reservation "ppc7450-veccmp" 2 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,veccmplx_7450") + +(define_insn_reservation "ppc7450-vecfloat" 4 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,vecflt_7450") + +(define_insn_reservation "ppc7450-vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppc7450")) + "ppc7450_du,ppc7450_vec_du,vecperm_7450") + diff --git a/gcc/config/rs6000/7xx.md b/gcc/config/rs6000/7xx.md new file mode 100644 index 0000000..ba64bcf --- /dev/null +++ b/gcc/config/rs6000/7xx.md @@ -0,0 +1,142 @@ +(define_automaton "ppc7xx,ppc7xxfp,ppc7xxother,ppc7xxvec") +(define_cpu_unit "iu1_7xx,iu2_7xx" "ppc7xx") +(define_cpu_unit "fpu_7xx" "ppc7xxfp") +(define_cpu_unit "lsu_7xx,bpu_7xx,sru_7xx" "ppc7xxother") +(define_cpu_unit "du1_7xx,du2_7xx" "ppc7xx") +(define_cpu_unit "veccmplx_7xx,vecperm_7xx,vdu_7xx" "ppc7xxvec") + +;; PPC740/PPC750/PPC7400 32-bit 2xIU, LSU, SRU, FPU, BPU +;; IU1 can perform all integer operations +;; IU2 can perform all integer operations except imul and idiv +;; LSU 2 stage pipelined +;; FPU 3 stage pipelined +;; Max issue 3 insns/clock cycle (includes 1 branch) +;; In-order execution + + +;; The PPC750 user's manual recommends that to reduce branch mispredictions, +;; the insn that sets CR bits should be separated from the branch insn +;; that evaluates them. There is no advantage have more than 10 cycles +;; of separation. +;; This could be artificially achieved by exagerating the latency of +;; compare insns but at the expense of a poorer schedule. + +;; Branches go straight to the BPU. All other insns are handled +;; by a dispatch unit which can issue a max of 2 insns per cycle. +(define_reservation "ppc750_du" "du1_7xx|du2_7xx") +(define_reservation "ppc7400_vec_du" "vdu_7xx") + +(define_insn_reservation "ppc750-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\ + load_ux,load_u,fpload,fpload_ux,fpload_u,vecload") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,lsu_7xx") + +(define_insn_reservation "ppc750-store" 1 + (and (eq_attr "type" "store,store_ux,store_u,\ + fpstore,fpstore_ux,fpstore_u,vecstore") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,lsu_7xx") + +(define_insn_reservation "ppc750-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,(iu1_7xx|iu2_7xx)") + +(define_insn_reservation "ppc750-imul" 4 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx*4") + +(define_insn_reservation "ppc750-imul2" 3 + (and (eq_attr "type" "imul2") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx*2") + +(define_insn_reservation "ppc750-imul3" 2 + (and (eq_attr "type" "imul3") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx") + +(define_insn_reservation "ppc750-idiv" 19 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx*19") + +(define_insn_reservation "ppc750-compare" 2 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,(iu1_7xx|iu2_7xx)") + +(define_insn_reservation "ppc750-fpcompare" 2 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx") + +(define_insn_reservation "ppc750-fp" 3 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx") + +(define_insn_reservation "ppc750-dmul" 4 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc750")) + "ppc750_du,fpu_7xx*2") + +(define_insn_reservation "ppc7400-dmul" 3 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,fpu_7xx") + +; Divides are not pipelined +(define_insn_reservation "ppc750-sdiv" 17 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx*17") + +(define_insn_reservation "ppc750-ddiv" 31 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,fpu_7xx*31") + +(define_insn_reservation "ppc750-mfcr" 2 + (and (eq_attr "type" "mfcr,mtcr") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,iu1_7xx") + +(define_insn_reservation "ppc750-crlogical" 3 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppc750,ppc7400")) + "ppc750_du,sru_7xx*2") + +(define_insn_reservation "ppc750-mtjmpr" 2 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc750,ppc7400")) + "nothing,sru_7xx*2") + +(define_insn_reservation "ppc750-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "ppc750,ppc7400")) + "nothing,bpu_7xx") + +;; Altivec +(define_insn_reservation "ppc7400-vecsimple" 1 + (and (eq_attr "type" "vecsimple,veccmp") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,veccmplx_7xx") + +(define_insn_reservation "ppc7400-veccomplex" 4 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,veccmplx_7xx") + +(define_insn_reservation "ppc7400-vecfloat" 4 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,veccmplx_7xx") + +(define_insn_reservation "ppc7400-vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "ppc7400")) + "ppc750_du,ppc7400_vec_du,vecperm_7xx") + diff --git a/gcc/config/rs6000/mpc.md b/gcc/config/rs6000/mpc.md new file mode 100644 index 0000000..b944e8e --- /dev/null +++ b/gcc/config/rs6000/mpc.md @@ -0,0 +1,79 @@ +(define_automaton "mpc,mpcfp") +(define_cpu_unit "iu_mpc,mciu_mpc" "mpc") +(define_cpu_unit "fpu_mpc" "mpcfp") +(define_cpu_unit "lsu_mpc,bpu_mpc" "mpc") + +;; MPCCORE 32-bit SCIU, MCIU, LSU, FPU, BPU +;; 505/801/821/823 + +(define_insn_reservation "mpccore-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "mpccore")) + "lsu_mpc") + +(define_insn_reservation "mpccore-store" 1 + (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "mpccore")) + "lsu_mpc") + +(define_insn_reservation "mpccore-fpload" 2 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "mpccore")) + "lsu_mpc") + +(define_insn_reservation "mpccore-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "mpccore")) + "iu_mpc") + +(define_insn_reservation "mpccore-imul" 2 + (and (eq_attr "type" "imul,imul2,imul3") + (eq_attr "cpu" "mpccore")) + "mciu_mpc") + +; Divide latency varies greatly from 2-11, use 6 as average +(define_insn_reservation "mpccore-idiv" 6 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "mpccore")) + "mciu_mpc*6") + +(define_insn_reservation "mpccore-compare" 3 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "mpccore")) + "iu_mpc,nothing,bpu_mpc") + +(define_insn_reservation "mpccore-fpcompare" 2 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "mpccore")) + "fpu_mpc,bpu_mpc") + +(define_insn_reservation "mpccore-fp" 4 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*2") + +(define_insn_reservation "mpccore-dmul" 5 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*5") + +(define_insn_reservation "mpccore-sdiv" 10 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*10") + +(define_insn_reservation "mpccore-ddiv" 17 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "mpccore")) + "fpu_mpc*17") + +(define_insn_reservation "mpccore-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "mpccore")) + "bpu_mpc") + +(define_insn_reservation "mpccore-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr,mfcr,mtcr") + (eq_attr "cpu" "mpccore")) + "bpu_mpc") + diff --git a/gcc/config/rs6000/power4.md b/gcc/config/rs6000/power4.md new file mode 100644 index 0000000..0f97b31 --- /dev/null +++ b/gcc/config/rs6000/power4.md @@ -0,0 +1,281 @@ +;; POWER4 model + +;; Sources: IBM Red Book and White Paper on POWER4 + +;; The POWER4 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip). +;; Instructions that update more than one register get broken into two +;; (split) or more internal ops. The chip can issue up to 5 +;; internal ops per cycle. + +(define_automaton "power4iu,power4lsu,power4fpu,power4misc,power4vec,power4disp") + +(define_cpu_unit "iu1_power4,iu2_power4" "power4iu") +(define_cpu_unit "lsu1_power4,lsu2_power4" "power4lsu") +(define_cpu_unit "fpu1_power4,fpu2_power4" "power4fpu") +(define_cpu_unit "bpu_power4,cru_power4" "power4misc") +(define_cpu_unit "vec_power4,vecperm_power4" "power4vec") +(define_cpu_unit "du1_power4,du2_power4,du3_power4,du4_power4,du5_power4" + "power4disp") + +(define_reservation "q1_power4" "du1_power4|du4_power4") +(define_reservation "q2_power4" "du2_power4|du3_power4") + +(define_reservation "lsq_power4" "((du1_power4|du4_power4),lsu1_power4)\ + |((du2_power4|du3_power4),lsu2_power4)") + +(define_reservation "lsuq_power4" + "((du1_power4+du2_power4),lsu1_power4+iu2_power4)\ + |((du2_power4+du3_power4),lsu2_power4+iu2_power4)\ + |((du3_power4+du4_power4),lsu2_power4+iu1_power4)") +;;; |((du2_power4+du3_power4),lsu2_power4,iu2_power4) + +(define_reservation "lsuxq_power4" + "(du1_power4+du2_power4+du3_power4+du4_power4),\ + iu1_power4,(lsu2_power4+iu2_power4)") + +(define_reservation "iq_power4" "((du1_power4|du4_power4),iu1_power4)\ + |((du2_power4|du3_power4),iu2_power4)") + +(define_reservation "fpq_power4" "((du1_power4|du4_power4),fpu1_power4)\ + |((du2_power4|du3_power4),fpu2_power4)") + +(define_reservation "vq_power4" + "(du1_power4|du2_power4|du3_power4|du4_power4),vec_power4") +(define_reservation "vpq_power4" + "(du1_power4|du2_power4|du3_power4|du4_power4),\ + vecperm_power4") + + +; Dispatch slots are allocated in order conforming to program order. +(absence_set "du1_power4" "du2_power4,du3_power4,du4_power4,du5_power4") +(absence_set "du2_power4" "du3_power4,du4_power4,du5_power4") +(absence_set "du3_power4" "du4_power4,du5_power4") +(absence_set "du4_power4" "du5_power4") + + +; Load/store +(define_insn_reservation "power4-load" 3 + (and (eq_attr "type" "load") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-load-ext" 5 + (and (eq_attr "type" "load_ext") + (eq_attr "cpu" "power4")) + "((du1_power4+du2_power4),lsu1_power4,nothing,nothing,iu2_power4)\ + |((du2_power4+du3_power4),lsu2_power4,nothing,nothing,iu2_power4)\ + |((du3_power4+du4_power4),lsu2_power4,nothing,nothing,iu1_power4)") + +(define_insn_reservation "power4-load-ext-update" 5 + (and (eq_attr "type" "load_ext_u") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4+du3_power4+du4_power4),\ + (lsu1_power4+iu2_power4),nothing,nothing,iu2_power4") + +(define_insn_reservation "power4-load-ext-update-indexed" 5 + (and (eq_attr "type" "load_ext_ux") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4+du3_power4+du4_power4),\ + iu1_power4,(lsu2_power4+iu1_power4),nothing,nothing,iu2_power4") + +(define_insn_reservation "power4-load-update-indexed" 3 + (and (eq_attr "type" "load_ux") + (eq_attr "cpu" "power4")) + "lsuxq_power4") + +(define_insn_reservation "power4-load-update" 3 + (and (eq_attr "type" "load_u") + (eq_attr "cpu" "power4")) + "lsuq_power4") + +(define_insn_reservation "power4-fpload" 5 + (and (eq_attr "type" "fpload") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-fpload-update" 5 + (and (eq_attr "type" "fpload_u") + (eq_attr "cpu" "power4")) + "lsuq_power4") + +(define_insn_reservation "power4-fpload-update-indexed" 5 + (and (eq_attr "type" "fpload_ux") + (eq_attr "cpu" "power4")) + "lsuxq_power4") + +(define_insn_reservation "power4-vecload" 5 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-store" 1 + (and (eq_attr "type" "store,vecstore") + (eq_attr "cpu" "power4")) + "lsq_power4") + +(define_insn_reservation "power4-store-update" 1 + (and (eq_attr "type" "store_u") + (eq_attr "cpu" "power4")) + "lsuq_power4") + +(define_insn_reservation "power4-store-update-indexed" 1 + (and (eq_attr "type" "store_ux") + (eq_attr "cpu" "power4")) + "lsuxq_power4") + +(define_insn_reservation "power4-fpstore" 1 + (and (eq_attr "type" "fpstore") + (eq_attr "cpu" "power4")) + "(du1_power4,fpu1_power4,lsu1_power4)\ + |(du2_power4,fpu2_power4,lsu2_power4)\ + |(du3_power4,fpu2_power4,lsu2_power4)\ + |(du4_power4,fpu1_power4,lsu1_power4)") + +(define_insn_reservation "power4-fpstore-update" 1 + (and (eq_attr "type" "fpstore_u") + (eq_attr "cpu" "power4")) + "((du1_power4+du2_power4),(fpu1_power4+iu2_power4),lsu1_power4)\ + |((du2_power4+du3_power4),(fpu2_power4+iu2_power4),lsu2_power4)\ + |((du3_power4+du4_power4),(fpu2_power4+iu1_power4),lsu2_power4)") +;;;((du2_power4+du3_power4),fpu2_power4,(iu2_power4+lsu2_power4)) + +(define_insn_reservation "power4-fpstore-update-indexed" 1 + (and (eq_attr "type" "fpstore_ux") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4+du3_power4+du4_power4), + iu1_power4,fpu2_power4,(iu2_power4+lsu2_power4)") + + +; Integer latency is 2 cycles +(define_insn_reservation "power4-integer" 2 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "power4")) + "iq_power4") + +(define_insn_reservation "power4-cmp" 3 + (and (eq_attr "type" "cmp") + (eq_attr "cpu" "power4")) + "iq_power4") + +(define_insn_reservation "power4-compare" 3 + (and (eq_attr "type" "compare,delayed_compare") + (eq_attr "cpu" "power4")) + "((du1_power4+du2_power4),iu1_power4,iu2_power4)\ + |((du2_power4+du3_power4),iu2_power4,iu2_power4)\ + |((du3_power4+du4_power4),iu2_power4,iu1_power4)") + +(define_insn_reservation "power4-imul" 7 + (and (eq_attr "type" "imul,lmul") + (eq_attr "cpu" "power4")) + "(q1_power4,iu1_power4*6)|(q2_power4,iu2_power4*6)") + +(define_insn_reservation "power4-imul2" 5 + (and (eq_attr "type" "imul2") + (eq_attr "cpu" "power4")) + "(q1_power4,iu1_power4*4)|(q2_power4,iu2_power4*4)") + +(define_insn_reservation "power4-imul3" 4 + (and (eq_attr "type" "imul3") + (eq_attr "cpu" "power4")) + "(q1_power4,iu1_power4*3)|(q2_power4,iu2_power4*3)") + +; SPR move only executes in first IU. +; Integer division only executes in second IU. +(define_insn_reservation "power4-idiv" 36 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4),iu2_power4*35") + +(define_insn_reservation "power4-ldiv" 68 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4),iu2_power4*67") + + +(define_insn_reservation "power4-mtjmpr" 3 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power4")) + "du1_power4,bpu_power4") + + +; Branches take dispatch Slot 4. The presence_sets prevent other insn from +; grabbing previous dispatch slots once this is assigned. +(define_insn_reservation "power4-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power4")) + "du5_power4,bpu_power4") + + +; Condition Register logical ops are split if non-destructive (RT != RB) +(define_insn_reservation "power4-crlogical" 2 + (and (eq_attr "type" "cr_logical") + (eq_attr "cpu" "power4")) + "du1_power4,cru_power4") + +(define_insn_reservation "power4-delayedcr" 4 + (and (eq_attr "type" "delayed_cr") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4),cru_power4,cru_power4") + +; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu +(define_insn_reservation "power4-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power4")) + "(du1_power4+du2_power4+du3_power4+du4_power4),\ + (du1_power4+du2_power4+du3_power4+du4_power4+cru_power4),\ + cru_power4,cru_power4,cru_power4") + +; mtcrf (1 field) +(define_insn_reservation "power4-mtcr" 4 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power4")) + "du1_power4,iu1_power4") + +; Basic FP latency is 6 cycles +(define_insn_reservation "power4-fp" 6 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power4")) + "fpq_power4") + +(define_insn_reservation "power4-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power4")) + "fpq_power4") + +(define_insn_reservation "power4-sdiv" 33 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "power4")) + "(q1_power4,fpu1_power4*28)|(q2_power4,fpu2_power4*28)") + +(define_insn_reservation "power4-sqrt" 40 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "power4")) + "(q1_power4,fpu1_power4*35)|(q2_power4,fpu2_power4*35)") + + +; VMX +(define_insn_reservation "power4-vec" 2 + (and (eq_attr "type" "vecsimple,veccomplex") + (eq_attr "cpu" "power4")) + "vq_power4") + +; vecfp compare +(define_insn_reservation "power4-veccmp" 8 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "power4")) + "vq_power4") + +(define_insn_reservation "power4-vecfloat" 8 + (and (eq_attr "type" "vecfloat") + (eq_attr "cpu" "power4")) + "vq_power4") + +(define_insn_reservation "power4-vecperm" 2 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power4")) + "vpq_power4") + +(define_bypass 4 "power4-vecload" "power4-vecperm") +(define_bypass 5 "power4-vec" "power4-branch,power4-crlogical") +(define_bypass 3 "power4-vec,power4-vecfloat" "power4-vecperm") +(define_bypass 3 "power4-vecperm" "power4-vec,power4-vecfloat") diff --git a/gcc/config/rs6000/rios1.md b/gcc/config/rs6000/rios1.md new file mode 100644 index 0000000..d3c2e0e --- /dev/null +++ b/gcc/config/rs6000/rios1.md @@ -0,0 +1,144 @@ +(define_automaton "rios1,rios1fp") +(define_cpu_unit "iu_rios1" "rios1") +(define_cpu_unit "fpu_rios1" "rios1fp") +(define_cpu_unit "bpu_rios1" "rios1") + +;; RIOS1 32-bit IU, FPU, BPU + +(define_insn_reservation "rios1-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "rios1,ppc601")) + "iu_rios1") + +(define_insn_reservation "rios1-store" 1 + (and (eq_attr "type" "store,store_ux,store_u") + (eq_attr "cpu" "rios1,ppc601")) + "iu_rios1") + +(define_insn_reservation "rios1-fpload" 2 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "rios1")) + "iu_rios1") + +(define_insn_reservation "ppc601-fpload" 3 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "ppc601")) + "iu_rios1") + +(define_insn_reservation "rios1-fpstore" 1 + (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "rios1,ppc601")) + "iu_rios1+fpu_rios1") + +(define_insn_reservation "rios1-integer" 1 + (and (eq_attr "type" "integer,mfcr,mtcr") + (eq_attr "cpu" "rios1,ppc601")) + "iu_rios1") + +(define_insn_reservation "rios1-imul" 5 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "rios1")) + "iu_rios1*5") + +(define_insn_reservation "rios1-imul2" 4 + (and (eq_attr "type" "imul2") + (eq_attr "cpu" "rios1")) + "iu_rios1*4") + +(define_insn_reservation "rios1-imul3" 3 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "rios1")) + "iu_rios1*3") + +(define_insn_reservation "ppc601-imul" 5 + (and (eq_attr "type" "imul,imul2,imul3") + (eq_attr "cpu" "ppc601")) + "iu_rios1*5") + +(define_insn_reservation "rios1-idiv" 19 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "rios1")) + "iu_rios1*19") + +(define_insn_reservation "ppc601-idiv" 36 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppc601")) + "iu_rios1*36") + +; compare executes on integer unit, but feeds insns which +; execute on the branch unit. +(define_insn_reservation "rios1-compare" 4 + (and (eq_attr "type" "cmp,compare") + (eq_attr "cpu" "rios1")) + "iu_rios1,nothing*2,bpu_rios1") + +(define_insn_reservation "rios1-delayed_compare" 5 + (and (eq_attr "type" "delayed_compare") + (eq_attr "cpu" "rios1")) + "iu_rios1,nothing*3,bpu_rios1") + +(define_insn_reservation "ppc601-compare" 3 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "ppc601")) + "iu_rios1,nothing,bpu_rios1") + +(define_insn_reservation "rios1-fpcompare" 9 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "rios1")) + "fpu_rios1,nothing*3,bpu_rios1") + +(define_insn_reservation "ppc601-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "ppc601")) + "(fpu_rios1+iu_rios1*2),nothing*2,bpu_rios1") + +(define_insn_reservation "rios1-fp" 2 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "rios1")) + "fpu_rios1") + +(define_insn_reservation "ppc601-fp" 4 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "ppc601")) + "fpu_rios1") + +(define_insn_reservation "rios1-dmul" 5 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "ppc601")) + "fpu_rios1*2") + +(define_insn_reservation "rios1-sdiv" 19 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "rios1")) + "fpu_rios1*19") + +(define_insn_reservation "ppc601-sdiv" 17 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppc601")) + "fpu_rios1*17") + +(define_insn_reservation "ppc601-ddiv" 31 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppc601")) + "fpu_rios1*31") + +(define_insn_reservation "rios1-crlogical" 4 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "rios1,ppc601")) + "bpu_rios1") + +(define_insn_reservation "rios1-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "rios1")) + "bpu_rios1") + +(define_insn_reservation "ppc601-mtjmpr" 4 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "ppc601")) + "bpu_rios1") + +(define_insn_reservation "rios1-branch" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "rios1,ppc601")) + "bpu_rios1") + diff --git a/gcc/config/rs6000/rios2.md b/gcc/config/rs6000/rios2.md new file mode 100644 index 0000000..af96b87 --- /dev/null +++ b/gcc/config/rs6000/rios2.md @@ -0,0 +1,82 @@ +(define_automaton "rios2,rios2fp") +(define_cpu_unit "iu1_rios2,iu2_rios2" "rios2") +(define_cpu_unit "fpu1_rios2,fpu2_rios2" "rios2fp") +(define_cpu_unit "bpu_rios2" "rios2") + +;; RIOS2 32-bit 2xIU, 2xFPU, BPU +;; IU1 can perform all integer operations +;; IU2 can perform all integer operations except imul and idiv + +(define_insn_reservation "rios2-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\ + load_ux,load_u,fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "rios2")) + "iu1_rios2|iu2_rios2") + +(define_insn_reservation "rios2-store" 1 + (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "rios2")) + "iu1_rios2|iu2_rios2") + +(define_insn_reservation "rios2-integer" 1 + (and (eq_attr "type" "integer,mfcr,mtcr") + (eq_attr "cpu" "rios2")) + "iu1_rios2|iu2_rios2") + +(define_insn_reservation "rios2-imul" 2 + (and (eq_attr "type" "imul,imul2,imul3") + (eq_attr "cpu" "rios2")) + "iu1_rios2*2") + +(define_insn_reservation "rios2-idiv" 13 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "rios2")) + "iu1_rios2*13") + +; compare executes on integer unit, but feeds insns which +; execute on the branch unit. +(define_insn_reservation "rios2-compare" 3 + (and (eq_attr "type" "cmp,compare,delayed_compare") + (eq_attr "cpu" "rios2")) + "(iu1_rios2|iu2_rios2),nothing,bpu_rios2") + +(define_insn_reservation "rios2-fp" 2 + (and (eq_attr "type" "fp") + (eq_attr "cpu" "rios2")) + "fpu1_rios2|fpu2_rios2") + +(define_insn_reservation "rios2-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "rios2")) + "(fpu1_rios2|fpu2_rios2),nothing*3,bpu_rios2") + +(define_insn_reservation "rios2-dmul" 2 + (and (eq_attr "type" "dmul") + (eq_attr "cpu" "rios2")) + "fpu1_rios2|fpu2_rios2") + +(define_insn_reservation "rios2-sdiv" 17 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "rios2")) + "(fpu1_rios2*17)|(fpu2_rios2*17)") + +(define_insn_reservation "rios2-ssqrt" 26 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "rios2")) + "(fpu1_rios2*26)|(fpu2_rios2*26)") + +(define_insn_reservation "rios2-crlogical" 4 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "rios2")) + "bpu_rios2") + +(define_insn_reservation "rios2-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "rios2")) + "bpu_rios2") + +(define_insn_reservation "rios2-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "rios2")) + "bpu_rios2") + diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index edc0e36..240f2d5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -227,6 +227,9 @@ static unsigned int rs6000_xcoff_section_type_flags PARAMS ((tree, const char *, static void rs6000_xcoff_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED; static bool rs6000_binds_local_p PARAMS ((tree)); +static int rs6000_use_dfa_pipeline_interface PARAMS ((void)); +static int rs6000_multipass_dfa_lookahead PARAMS ((void)); +static int rs6000_variable_issue PARAMS ((FILE *, int, rtx, int)); static bool rs6000_rtx_costs PARAMS ((rtx, int, int, int *)); static int rs6000_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static int rs6000_adjust_priority PARAMS ((rtx, int)); @@ -380,6 +383,13 @@ static const char alt_reg_names[][8] = #undef TARGET_ASM_FUNCTION_EPILOGUE #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue +#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE +#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE rs6000_use_dfa_pipeline_interface +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_multipass_dfa_lookahead +#undef TARGET_SCHED_VARIABLE_ISSUE +#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue + #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate #undef TARGET_SCHED_ADJUST_COST @@ -12203,6 +12213,57 @@ output_function_profiler (file, labelno) } } + +static int +rs6000_use_dfa_pipeline_interface () +{ + return 1; +} + +static int +rs6000_multipass_dfa_lookahead () +{ + if (rs6000_cpu == PROCESSOR_POWER4) + return 4; + else + return 1; +} + +/* Power4 load update and store update instructions are cracked into a + load or store and an integer insn which are executed in the same cycle. + Branches have their own dispatch slot which does not count against the + GCC issue rate, but it changes the program flow so there are no other + instructions to issue in this cycle. */ + +static int +rs6000_variable_issue (stream, verbose, insn, more) + FILE *stream ATTRIBUTE_UNUSED; + int verbose ATTRIBUTE_UNUSED; + rtx insn; + int more; +{ + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return more; + + if (rs6000_cpu == PROCESSOR_POWER4) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_LOAD_EXT_U || type == TYPE_LOAD_EXT_UX + || type == TYPE_LOAD_UX || type == TYPE_STORE_UX + || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX) + return 0; + else if (type == TYPE_LOAD_U || type == TYPE_STORE_U + || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U + || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR) + return more - 2; + else + return more - 1; + } + else + return more - 1; +} + /* Adjust the cost of a scheduling dependency. Return the new cost of a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ @@ -12246,10 +12307,12 @@ rs6000_adjust_cost (insn, link, dep_insn, cost) || rs6000_cpu_attr == CPU_POWER4) && recog_memoized (dep_insn) && (INSN_CODE (dep_insn) >= 0) - && (get_attr_type (dep_insn) == TYPE_COMPARE + && (get_attr_type (dep_insn) == TYPE_CMP + || get_attr_type (dep_insn) == TYPE_COMPARE || get_attr_type (dep_insn) == TYPE_DELAYED_COMPARE || get_attr_type (dep_insn) == TYPE_FPCOMPARE - || get_attr_type (dep_insn) == TYPE_CR_LOGICAL)) + || get_attr_type (dep_insn) == TYPE_CR_LOGICAL + || get_attr_type (dep_insn) == TYPE_DELAYED_CR)) return cost + 2; default: break; @@ -12315,6 +12378,7 @@ rs6000_issue_rate () case CPU_PPC601: /* ? */ case CPU_PPC7450: return 3; + case CPU_PPC440: case CPU_PPC603: case CPU_PPC750: case CPU_PPC7400: diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 1381ca4..744b6bc 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -338,6 +338,7 @@ enum processor_type PROCESSOR_MPCCORE, PROCESSOR_PPC403, PROCESSOR_PPC405, + PROCESSOR_PPC440, PROCESSOR_PPC601, PROCESSOR_PPC603, PROCESSOR_PPC604, diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b06f01b..76ad8df 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -35,11 +35,11 @@ ;; 15 load_macho_picbase ;; 16 macho_correct_pic ;; 19 movesi_from_cr -;; 20 movesi_to_cr +;; 20 movsi_to_cr ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,load,store,fpload,fpstore,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,branch,compare,cr_logical,delayed_compare,fpcompare,mtjmpr,fp,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,vecsimple,veccomplex,veccmp,vecperm,vecfloat" +(define_attr "type" "integer,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,branch,cmp,compare,delayed_compare,fpcompare,cr_logical,delayed_cr,mfcr,mtcr,mtjmpr,fp,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,vecsimple,veccomplex,veccmp,vecperm,vecfloat" (const_string "integer")) ;; Length (in bytes). @@ -58,800 +58,21 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4" +(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,power4" (const (symbol_ref "rs6000_cpu_attr"))) -; (define_function_unit NAME MULTIPLICITY SIMULTANEITY -; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST]) - -; Load/Store Unit -- pure PowerPC only -; (POWER and 601 use Integer Unit) -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "load") - (eq_attr "cpu" "rs64a,mpccore,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400")) - 2 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "load,vecload") - (eq_attr "cpu" "ppc7450")) - 3 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "store,fpstore") - (eq_attr "cpu" "rs64a,mpccore,ppc603,ppc604,ppc604e,ppc620,ppc630")) - 1 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "store,fpstore") - (eq_attr "cpu" "ppc750,ppc7400")) - 2 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "store,vecstore") - (eq_attr "cpu" "ppc7450")) - 3 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "fpstore") - (eq_attr "cpu" "ppc7450")) - 3 3) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "mpccore,ppc603,ppc750,ppc7400")) - 2 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "ppc7450")) - 4 1) - -(define_function_unit "lsu" 1 0 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "rs64a,ppc604,ppc604e,ppc620,ppc630")) - 3 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "load") - (eq_attr "cpu" "rios1,ppc403,ppc405,ppc601")) - 2 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "store,fpstore") - (eq_attr "cpu" "rios1,ppc403,ppc405,ppc601")) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fpstore") - (eq_attr "cpu" "rios1,ppc601")) - 0 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "rios1")) - 2 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "ppc601")) - 3 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "load,fpload") - (eq_attr "cpu" "rios2")) - 2 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "store,fpstore") - (eq_attr "cpu" "rios2")) - 1 1) - -; Integer Unit (RIOS1, PPC601, PPC603, RS64a) -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "integer") - (eq_attr "cpu" "rios1,rs64a,mpccore,ppc403,ppc405,ppc601,ppc603")) - 1 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "cr_logical") - (eq_attr "cpu" "mpccore,ppc403,ppc405,ppc601")) - 1 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "ppc403")) - 4 4) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "ppc405")) - 4 3) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul2,imul3") - (eq_attr "cpu" "ppc405")) - 3 2) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "rios1")) - 5 5) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul2") - (eq_attr "cpu" "rios1")) - 4 4) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul3") - (eq_attr "cpu" "rios1")) - 3 3) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "ppc601,ppc603")) - 5 5) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "rs64a")) - 20 20) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul2") - (eq_attr "cpu" "rs64a")) - 12 12) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "imul3") - (eq_attr "cpu" "rs64a")) - 8 8) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "lmul") - (eq_attr "cpu" "rs64a")) - 34 34) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "rios1")) - 19 19) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "rs64a")) - 66 66) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "ldiv") - (eq_attr "cpu" "rs64a")) - 66 66) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc403")) - 33 33) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc405")) - 35 35) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc601")) - 36 36) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc603")) - 37 36) - -; RIOS2 has two integer units: a primary one which can perform all -; operations and a secondary one which is fed in lock step with the first -; and can perform "simple" integer operations. -; To catch this we define a 'dummy' imuldiv-unit that is also needed -; for the complex insns. -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "integer") - (eq_attr "cpu" "rios2")) - 1 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "rios2")) - 2 2) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "rios2")) - 13 13) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "rios2")) - 2 2) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "rios2")) - 13 13) - -; MPCCORE has separate IMUL/IDIV unit for multicycle instructions -; Divide latency varies greatly from 2-11, use 6 as average -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "mpccore")) - 2 1) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "mpccore")) - 6 6) - -; PPC604{,e} has two units that perform integer operations -; and one unit for divide/multiply operations (and move -; from/to spr). -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "integer") - (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630")) - 1 1) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "ppc604")) - 4 2) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul,imul2,imul3") - (eq_attr "cpu" "ppc604e")) - 2 1) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "ppc620,ppc630")) - 5 3) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul2") - (eq_attr "cpu" "ppc620,ppc630")) - 4 3) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul3") - (eq_attr "cpu" "ppc620,ppc630")) - 3 3) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "lmul") - (eq_attr "cpu" "ppc620,ppc630")) - 7 5) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc604,ppc604e")) - 20 19) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc620")) - 37 36) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc630")) - 21 20) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "ldiv") - (eq_attr "cpu" "ppc620,ppc630")) - 37 36) - -; PPC7450 has 3 integer units (for most integer insns) and one mul/div -; unit, which also does CR-logical insns and move to/from SPR. -; It also has 4 vector units, one for each type of vector instruction. -; However, we can only dispatch 2 instructions per cycle. -; We model this as saying that dispatching two of the same type of instruction -; in a row incurs a single cycle delay. -(define_function_unit "iu3" 3 0 - (and (eq_attr "type" "integer") - (eq_attr "cpu" "ppc7450")) - 1 1) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "ppc7450")) - 4 2) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul2,imul3") - (eq_attr "cpu" "ppc7450")) - 3 1) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc7450")) - 23 23) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "cr_logical") - (eq_attr "cpu" "ppc7450")) - 1 1) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "vecsimple") - (eq_attr "cpu" "ppc7450")) - 1 2 [(eq_attr "type" "vecsimple")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "vecsimple") - (eq_attr "cpu" "ppc7450")) - 1 1 [(eq_attr "type" "!vecsimple")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "veccomplex") - (eq_attr "cpu" "ppc7450")) - 4 2 [(eq_attr "type" "veccomplex")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "veccomplex") - (eq_attr "cpu" "ppc7450")) - 4 1 [(eq_attr "type" "!veccomplex")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "veccmp") - (eq_attr "cpu" "ppc7450")) - 2 2 [(eq_attr "type" "veccmp")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "veccmp") - (eq_attr "cpu" "ppc7450")) - 2 1 [(eq_attr "type" "!veccmp")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "vecfloat") - (eq_attr "cpu" "ppc7450")) - 4 2 [(eq_attr "type" "vecfloat")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "vecfloat") - (eq_attr "cpu" "ppc7450")) - 4 1 [(eq_attr "type" "!vecfloat")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "vecperm") - (eq_attr "cpu" "ppc7450")) - 2 2 [(eq_attr "type" "vecperm")]) - -(define_function_unit "vec_alu2" 2 0 - (and (eq_attr "type" "vecperm") - (eq_attr "cpu" "ppc7450")) - 2 1 [(eq_attr "type" "!vecperm")]) - -; PPC750 has two integer units: a primary one which can perform all -; operations and a secondary one which is fed in lock step with the first -; and can perform "simple" integer operations. -; To catch this we define a 'dummy' imuldiv-unit that is also needed -; for the complex insns. -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "integer") - (eq_attr "cpu" "ppc750,ppc7400")) - 1 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "ppc750,ppc7400")) - 4 4) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul2") - (eq_attr "cpu" "ppc750,ppc7400")) - 3 2) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul3") - (eq_attr "cpu" "ppc750,ppc7400")) - 2 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc750,ppc7400")) - 19 19) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul") - (eq_attr "cpu" "ppc750,ppc7400")) - 4 4) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul2") - (eq_attr "cpu" "ppc750,ppc7400")) - 3 2) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "imul3") - (eq_attr "cpu" "ppc750,ppc7400")) - 2 1) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc750,ppc7400")) - 19 19) - -; CR-logical operations are execute-serialized, that is they don't -; start (and block the function unit) until all preceding operations -; have finished. They don't block dispatch of other insns, though. -; I've imitated this by giving them longer latency. -(define_function_unit "sru" 1 0 - (and (eq_attr "type" "cr_logical") - (eq_attr "cpu" "ppc603,ppc750,ppc7400")) - 3 2) - -; compare is done on integer unit, but feeds insns which -; execute on the branch unit. -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "compare") - (eq_attr "cpu" "rios1")) - 4 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "delayed_compare") - (eq_attr "cpu" "rios1")) - 5 1) - -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "compare,delayed_compare") - (eq_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc601,ppc603")) - 3 1) - -; some extra cycles added by TARGET_SCHED_ADJUST_COST between compare -; and a following branch, to reduce mispredicts -(define_function_unit "iu3" 3 0 - (and (eq_attr "type" "compare,delayed_compare") - (eq_attr "cpu" "ppc7450")) - 1 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "compare,delayed_compare") - (eq_attr "cpu" "rios2")) - 3 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "compare,delayed_compare") - (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400")) - 1 1) - -; fp compare uses fp unit -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "rios1")) - 9 1) - -; rios1 and rios2 have different fpcompare delays -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "rios2,ppc630")) - 5 1) - -; on ppc601 and ppc603, fpcompare takes also 2 cycles from -; the integer unit -; here we do not define delays, just occupy the unit. The dependencies -; will be assigned by the fpcompare definition in the fpu. -(define_function_unit "iu" 1 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "ppc601,ppc603")) - 0 2) - -; fp compare uses fp unit -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "rs64a,ppc601,ppc603,ppc604,ppc604e,ppc620")) - 5 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "ppc750,ppc7400,ppc7450")) - 3 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "mpccore")) - 1 1) - -(define_function_unit "bpu" 1 0 - (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "rios1,rios2,rs64a")) - 5 1) - -(define_function_unit "bpu" 1 0 - (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "mpccore,ppc403,ppc405,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630")) - 4 1) - -(define_function_unit "sru" 1 0 - (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "ppc750,ppc7400")) - 2 2) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "ppc7450")) - 2 2) - -(define_function_unit "bpu" 1 0 - (and (eq_attr "type" "cr_logical") - (eq_attr "cpu" "rios1,rios2,ppc604")) - 4 1) - -(define_function_unit "cru" 1 0 - (and (eq_attr "type" "cr_logical") - (eq_attr "cpu" "ppc604e,ppc620,ppc630,rs64a")) - 1 1) - -; all jumps/branches are executing on the bpu, in 1 cycle, for all machines. -(define_function_unit "bpu" 1 0 - (eq_attr "type" "jmpreg") - 1 1) - -(define_function_unit "bpu" 1 0 - (eq_attr "type" "branch") - 1 1) - -; Floating Point Unit -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fp,dmul") - (eq_attr "cpu" "rios1")) - 2 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fp") - (eq_attr "cpu" "rs64a,mpccore")) - 4 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fp") - (eq_attr "cpu" "ppc601")) - 4 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fp") - (eq_attr "cpu" "ppc603,ppc604,ppc604e,ppc620,ppc750,ppc7400")) - 3 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "fp,dmul") - (eq_attr "cpu" "ppc7450")) - 5 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "dmul") - (eq_attr "cpu" "rs64a")) - 7 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "dmul") - (eq_attr "cpu" "mpccore")) - 5 5) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "dmul") - (eq_attr "cpu" "ppc601")) - 5 2) - -; is this true? -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "dmul") - (eq_attr "cpu" "ppc603,ppc750")) - 4 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "dmul") - (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc7400")) - 3 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "sdiv,ddiv") - (eq_attr "cpu" "rios1")) - 19 19) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "rs64a")) - 31 31) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "ppc601,ppc750,ppc7400")) - 17 17) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "ppc7450")) - 21 21) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "mpccore")) - 10 10) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "ppc603,ppc604,ppc604e,ppc620")) - 18 18) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "ddiv") - (eq_attr "cpu" "mpccore")) - 17 17) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "ddiv") - (eq_attr "cpu" "rs64a,ppc601,ppc750,ppc604,ppc604e,ppc620,ppc7400")) - 31 31) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "ddiv") - (eq_attr "cpu" "ppc7450")) - 35 35) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "ddiv") - (eq_attr "cpu" "ppc603")) - 33 33) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "ssqrt") - (eq_attr "cpu" "ppc620")) - 31 31) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "type" "dsqrt") - (eq_attr "cpu" "ppc620")) - 31 31) - -; RIOS2 has two symmetric FPUs. -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "fp,dmul") - (eq_attr "cpu" "rios2")) - 2 1) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "fp,dmul") - (eq_attr "cpu" "ppc630")) - 3 1) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "sdiv,ddiv") - (eq_attr "cpu" "rios2")) - 17 17) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "ppc630")) - 17 17) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "ddiv") - (eq_attr "cpu" "ppc630")) - 21 21) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "ssqrt,dsqrt") - (eq_attr "cpu" "rios2")) - 26 26) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "ssqrt") - (eq_attr "cpu" "ppc630")) - 18 18) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "dsqrt") - (eq_attr "cpu" "ppc630")) - 26 26) - -;; Power4 -(define_function_unit "lsu2" 2 0 - (and (eq_attr "type" "load") - (eq_attr "cpu" "power4")) - 3 1) - -(define_function_unit "lsu2" 2 0 - (and (eq_attr "type" "fpload") - (eq_attr "cpu" "power4")) - 5 1) - -(define_function_unit "lsu2" 2 0 - (and (eq_attr "type" "store,fpstore") - (eq_attr "cpu" "power4")) - 1 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "integer") - (eq_attr "cpu" "power4")) - 2 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul,lmul") - (eq_attr "cpu" "power4")) - 7 6) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul2") - (eq_attr "cpu" "power4")) - 5 4) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "imul3") - (eq_attr "cpu" "power4")) - 4 3) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "power4")) - 36 35) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "ldiv") - (eq_attr "cpu" "power4")) - 68 67) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "idiv") - (eq_attr "cpu" "power4")) - 36 35) - -(define_function_unit "imuldiv" 1 0 - (and (eq_attr "type" "ldiv") - (eq_attr "cpu" "power4")) - 68 67) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "compare") - (eq_attr "cpu" "power4")) - 3 1) - -(define_function_unit "iu2" 2 0 - (and (eq_attr "type" "delayed_compare") - (eq_attr "cpu" "power4")) - 4 1) - -(define_function_unit "bpu" 1 0 - (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "power4")) - 3 1) - -(define_function_unit "bpu" 1 0 - (and (eq_attr "type" "jmpreg,branch") - (eq_attr "cpu" "power4")) - 2 1) - -(define_function_unit "cru" 1 0 - (and (eq_attr "type" "cr_logical") - (eq_attr "cpu" "power4")) - 4 1) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "fp,dmul") - (eq_attr "cpu" "power4")) - 6 1) - -; adjust_cost increases the cost of dependent branches, -; so shave a few cycles off for fpcompare. -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "power4")) - 5 1) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "sdiv,ddiv") - (eq_attr "cpu" "power4")) - 33 28) - -(define_function_unit "fpu2" 2 0 - (and (eq_attr "type" "ssqrt,dsqrt") - (eq_attr "cpu" "power4")) - 40 35) +(automata_option "ndfa") + +(include "rios1.md") +(include "rios2.md") +(include "rs64.md") +(include "mpc.md") +(include "40x.md") +(include "603.md") +(include "6xx.md") +(include "7xx.md") +(include "7450.md") +(include "power4.md") ;; Start with fixed-point load and store insns. Here we put only the more @@ -1062,7 +283,7 @@ "@ lha%U1%X1 %0,%1 extsh %0,%1" - [(set_attr "type" "load,*")]) + [(set_attr "type" "load_ext,*")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -1196,7 +417,7 @@ "@ lwa%U1%X1 %0,%1 extsw %0,%1" - [(set_attr "type" "load,*")]) + [(set_attr "type" "load_ext,*")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -1664,7 +885,7 @@ "@ lha%U1%X1 %0,%1 {exts|extsh} %0,%1" - [(set_attr "type" "load,*")]) + [(set_attr "type" "load_ext,*")]) (define_insn "" [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y") @@ -8475,8 +7696,8 @@ "") (define_insn "*movcc_internal1" - [(set (match_operand:CC 0 "nonimmediate_operand" "=y,x,y,r,r,r,cl,q,r,r,m") - (match_operand:CC 1 "nonimmediate_operand" "y,r,r,x,y,r,r,r,h,m,r"))] + [(set (match_operand:CC 0 "nonimmediate_operand" "=y,x,?y,r,r,r,r,q,cl,r,m") + (match_operand:CC 1 "nonimmediate_operand" "y,r,r,x,y,r,h,r,r,m,r"))] "register_operand (operands[0], CCmode) || register_operand (operands[1], CCmode)" "@ @@ -8486,12 +7707,12 @@ mfcr %0 mfcr %0\;{rlinm|rlwinm} %0,%0,%f1,0xf0000000 mr %0,%1 + mf%1 %0 mt%0 %1 mt%0 %1 - mf%1 %0 {l%U1%X1|lwz%U1%X1} %0,%1 {st%U0%U1|stw%U0%U1} %1,%0" - [(set_attr "type" "cr_logical,cr_logical,cr_logical,cr_logical,cr_logical,*,*,mtjmpr,*,load,store") + [(set_attr "type" "cr_logical,mtcr,mtcr,mfcr,mfcr,*,*,*,mtjmpr,load,store") (set_attr "length" "4,4,12,4,8,4,4,4,4,4,4")]) ;; For floating-point, we normally deal with the floating-point registers @@ -10146,18 +9367,7 @@ "@ ldux %3,%0,%2 ldu %3,%2(%0)" - [(set_attr "type" "load")]) - -(define_insn "*movdi_update2" - [(set (match_operand:DI 3 "gpc_reg_operand" "=r") - (sign_extend:DI - (mem:SI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0") - (match_operand:DI 2 "gpc_reg_operand" "r"))))) - (set (match_operand:DI 0 "gpc_reg_operand" "=b") - (plus:DI (match_dup 1) (match_dup 2)))] - "TARGET_POWERPC64" - "lwaux %3,%0,%2" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) (define_insn "movdi_update" [(set (mem:DI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0,0") @@ -10169,7 +9379,7 @@ "@ stdux %3,%0,%2 stdu %3,%2(%0)" - [(set_attr "type" "store")]) + [(set_attr "type" "store_ux,store_u")]) (define_insn "*movsi_update1" [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") @@ -10181,7 +9391,18 @@ "@ {lux|lwzux} %3,%0,%2 {lu|lwzu} %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) + +(define_insn "*movsi_update2" + [(set (match_operand:DI 3 "gpc_reg_operand" "=r") + (sign_extend:DI + (mem:SI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0") + (match_operand:DI 2 "gpc_reg_operand" "r"))))) + (set (match_operand:DI 0 "gpc_reg_operand" "=b") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_POWERPC64" + "lwaux %3,%0,%2" + [(set_attr "type" "load_ext_ux")]) (define_insn "movsi_update" [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") @@ -10193,9 +9414,9 @@ "@ {stux|stwux} %3,%0,%2 {stu|stwu} %3,%2(%0)" - [(set_attr "type" "store")]) + [(set_attr "type" "store_ux,store_u")]) -(define_insn "*movhi_update" +(define_insn "*movhi_update1" [(set (match_operand:HI 3 "gpc_reg_operand" "=r,r") (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") (match_operand:SI 2 "reg_or_short_operand" "r,I")))) @@ -10205,7 +9426,7 @@ "@ lhzux %3,%0,%2 lhzu %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) (define_insn "*movhi_update2" [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") @@ -10218,7 +9439,7 @@ "@ lhzux %3,%0,%2 lhzu %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) (define_insn "*movhi_update3" [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") @@ -10231,7 +9452,7 @@ "@ lhaux %3,%0,%2 lhau %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ext_ux,load_ext_u")]) (define_insn "*movhi_update4" [(set (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") @@ -10243,7 +9464,7 @@ "@ sthux %3,%0,%2 sthu %3,%2(%0)" - [(set_attr "type" "store")]) + [(set_attr "type" "store_ux,store_u")]) (define_insn "*movqi_update1" [(set (match_operand:QI 3 "gpc_reg_operand" "=r,r") @@ -10255,7 +9476,7 @@ "@ lbzux %3,%0,%2 lbzu %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) (define_insn "*movqi_update2" [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r") @@ -10268,7 +9489,7 @@ "@ lbzux %3,%0,%2 lbzu %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) (define_insn "*movqi_update3" [(set (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") @@ -10280,7 +9501,7 @@ "@ stbux %3,%0,%2 stbu %3,%2(%0)" - [(set_attr "type" "store")]) + [(set_attr "type" "store_ux,store_u")]) (define_insn "*movsf_update1" [(set (match_operand:SF 3 "gpc_reg_operand" "=f,f") @@ -10292,7 +9513,7 @@ "@ lfsux %3,%0,%2 lfsu %3,%2(%0)" - [(set_attr "type" "fpload")]) + [(set_attr "type" "fpload_ux,fpload_u")]) (define_insn "*movsf_update2" [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") @@ -10304,7 +9525,7 @@ "@ stfsux %3,%0,%2 stfsu %3,%2(%0)" - [(set_attr "type" "fpstore")]) + [(set_attr "type" "fpstore_ux,fpstore_u")]) (define_insn "*movsf_update3" [(set (match_operand:SF 3 "gpc_reg_operand" "=r,r") @@ -10316,7 +9537,7 @@ "@ {lux|lwzux} %3,%0,%2 {lu|lwzu} %3,%2(%0)" - [(set_attr "type" "load")]) + [(set_attr "type" "load_ux,load_u")]) (define_insn "*movsf_update4" [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") @@ -10328,7 +9549,7 @@ "@ {stux|stwux} %3,%0,%2 {stu|stwu} %3,%2(%0)" - [(set_attr "type" "store")]) + [(set_attr "type" "store_ux,store_u")]) (define_insn "*movdf_update1" [(set (match_operand:DF 3 "gpc_reg_operand" "=f,f") @@ -10340,7 +9561,7 @@ "@ lfdux %3,%0,%2 lfdu %3,%2(%0)" - [(set_attr "type" "fpload")]) + [(set_attr "type" "fpload_ux,fpload_u")]) (define_insn "*movdf_update2" [(set (mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0") @@ -10352,7 +9573,7 @@ "@ stfdux %3,%0,%2 stfdu %3,%2(%0)" - [(set_attr "type" "fpstore")]) + [(set_attr "type" "fpstore_ux,fpstore_u")]) ;; Peephole to convert two consecutive FP loads or stores into lfq/stfq. @@ -11668,7 +10889,7 @@ (match_operand:SI 2 "reg_or_short_operand" "rI")))] "" "{cmp%I2|cmpw%I2} %0,%1,%2" - [(set_attr "type" "compare")]) + [(set_attr "type" "cmp")]) (define_insn "*cmpdi_internal1" [(set (match_operand:CC 0 "cc_reg_operand" "=y") @@ -11676,7 +10897,7 @@ (match_operand:DI 2 "reg_or_short_operand" "rI")))] "TARGET_POWERPC64" "cmpd%I2 %0,%1,%2" - [(set_attr "type" "compare")]) + [(set_attr "type" "cmp")]) ;; If we are comparing a register for equality with a large constant, ;; we can do this with an XOR followed by a compare. But we need a scratch @@ -11712,7 +10933,7 @@ (match_operand:SI 2 "reg_or_u_short_operand" "rK")))] "" "{cmpl%I2|cmplw%I2} %0,%1,%b2" - [(set_attr "type" "compare")]) + [(set_attr "type" "cmp")]) (define_insn "*cmpdi_internal2" [(set (match_operand:CCUNS 0 "cc_reg_operand" "=y") @@ -11720,7 +10941,7 @@ (match_operand:DI 2 "reg_or_u_short_operand" "rK")))] "" "cmpld%I2 %0,%1,%b2" - [(set_attr "type" "compare")]) + [(set_attr "type" "cmp")]) ;; The following two insns don't exist as single insns, but if we provide ;; them, we can swap an add and compare, which will enable us to overlap more @@ -11808,7 +11029,7 @@ (const_int 0)]))] "" "%D1mfcr %0\;{rlinm|rlwinm} %0,%0,%J1,1" - [(set_attr "type" "cr_logical") + [(set_attr "type" "mfcr") (set_attr "length" "12")]) ;; Same as above, but get the OV/ORDERED bit. @@ -11817,7 +11038,8 @@ (unspec:SI [(match_operand 1 "cc_reg_operand" "y")] 724))] "TARGET_ISEL" "%D1mfcr %0\;{rlinm|rlwinm} %0,%0,%t1,1" - [(set_attr "length" "12")]) + [(set_attr "type" "mfcr") + (set_attr "length" "12")]) (define_insn "" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") @@ -11826,7 +11048,7 @@ (const_int 0)]))] "TARGET_POWERPC64" "%D1mfcr %0\;{rlinm|rlwinm} %0,%0,%J1,1" - [(set_attr "type" "cr_logical") + [(set_attr "type" "mfcr") (set_attr "length" "12")]) (define_insn "" @@ -11883,7 +11105,7 @@ return \"%D1mfcr %0\;{rlinm|rlwinm} %0,%0,%4,%5,%5\"; }" - [(set_attr "type" "cr_logical") + [(set_attr "type" "mfcr") (set_attr "length" "12")]) (define_insn "" @@ -11955,7 +11177,7 @@ (const_int 0)]))] "REGNO (operands[2]) != REGNO (operands[5])" "%D1%D4mfcr %3\;{rlinm|rlwinm} %0,%3,%J1,1\;{rlinm|rlwinm} %3,%3,%J4,1" - [(set_attr "type" "cr_logical") + [(set_attr "type" "mfcr") (set_attr "length" "20")]) (define_peephole @@ -11969,7 +11191,7 @@ (const_int 0)]))] "TARGET_POWERPC64 && REGNO (operands[2]) != REGNO (operands[5])" "%D1%D4mfcr %3\;{rlinm|rlwinm} %0,%3,%J1,1\;{rlinm|rlwinm} %3,%3,%J4,1" - [(set_attr "type" "cr_logical") + [(set_attr "type" "mfcr") (set_attr "length" "20")]) ;; There are some scc insns that can be done directly, without a compare. @@ -14080,7 +13302,7 @@ [(set (pc) (if_then_else (match_operator 1 "branch_comparison_operator" [(match_operand 2 - "cc_reg_operand" "x,?y") + "cc_reg_operand" "y") (const_int 0)]) (label_ref (match_operand 0 "" "")) (pc)))] @@ -14095,7 +13317,7 @@ [(set (pc) (if_then_else (match_operator 0 "branch_comparison_operator" [(match_operand 1 - "cc_reg_operand" "x,?y") + "cc_reg_operand" "y") (const_int 0)]) (return) (pc)))] @@ -14111,7 +13333,7 @@ [(set (pc) (if_then_else (match_operator 1 "branch_comparison_operator" [(match_operand 2 - "cc_reg_operand" "x,?y") + "cc_reg_operand" "y") (const_int 0)]) (pc) (label_ref (match_operand 0 "" ""))))] @@ -14126,7 +13348,7 @@ [(set (pc) (if_then_else (match_operator 0 "branch_comparison_operator" [(match_operand 1 - "cc_reg_operand" "x,?y") + "cc_reg_operand" "y") (const_int 0)]) (pc) (return)))] @@ -14145,56 +13367,57 @@ ; (eq:SI (reg:CCFP 68) (const_int 0))) ; (const_int 1))) ; which are generated by the branch logic. +; Prefer destructive operations where BT = BB (for crXX BT,BA,BB) (define_insn "" - [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y") (compare:CCEQ (match_operator:SI 1 "boolean_operator" - [(match_operator:SI 2 + [(match_operator:SI 2 "branch_positive_comparison_operator" [(match_operand 3 - "cc_reg_operand" "y") + "cc_reg_operand" "y,y") (const_int 0)]) - (match_operator:SI 4 + (match_operator:SI 4 "branch_positive_comparison_operator" [(match_operand 5 - "cc_reg_operand" "y") + "cc_reg_operand" "0,y") (const_int 0)])]) (const_int 1)))] "" "cr%q1 %E0,%j2,%j4" - [(set_attr "type" "cr_logical")]) + [(set_attr "type" "cr_logical,delayed_cr")]) ; Why is the constant -1 here, but 1 in the previous pattern? ; Because ~1 has all but the low bit set. (define_insn "" - [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y") (compare:CCEQ (match_operator:SI 1 "boolean_or_operator" - [(not:SI (match_operator:SI 2 + [(not:SI (match_operator:SI 2 "branch_positive_comparison_operator" [(match_operand 3 - "cc_reg_operand" "y") + "cc_reg_operand" "y,y") (const_int 0)])) (match_operator:SI 4 "branch_positive_comparison_operator" [(match_operand 5 - "cc_reg_operand" "y") + "cc_reg_operand" "0,y") (const_int 0)])]) (const_int -1)))] "" "cr%q1 %E0,%j2,%j4" - [(set_attr "type" "cr_logical")]) + [(set_attr "type" "cr_logical,delayed_cr")]) (define_insn "" - [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") + [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y") (compare:CCEQ (match_operator:SI 1 "branch_positive_comparison_operator" [(match_operand 2 - "cc_reg_operand" "y") + "cc_reg_operand" "0,y") (const_int 0)]) (const_int 0)))] "!TARGET_SPE" "{crnor %E0,%j1,%j1|crnot %E0,%j1}" - [(set_attr "type" "cr_logical")]) + [(set_attr "type" "cr_logical,delayed_cr")]) ;; If we are comparing the result of two comparisons, this can be done ;; using creqv or crxor. @@ -14877,7 +14100,7 @@ (reg:CC 72) (reg:CC 73) (reg:CC 74) (reg:CC 75)] 19))] "" "mfcr %0" - [(set_attr "type" "cr_logical")]) + [(set_attr "type" "mfcr")]) (define_insn "*stmw" [(match_parallel 0 "stmw_operation" @@ -14950,9 +14173,9 @@ operands[4] = GEN_INT (mask); return \"mtcrf %4,%2\"; }" - [(set_attr "type" "cr_logical")]) + [(set_attr "type" "mtcr")]) -(define_insn "" +(define_insn "*mtcrfsi" [(set (match_operand:CC 0 "cc_reg_operand" "=y") (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") (match_operand 2 "immediate_operand" "n")] 20))] @@ -14961,7 +14184,7 @@ && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 << (75 - REGNO (operands[0]))" "mtcrf %R0,%1" - [(set_attr "type" "cr_logical")]) + [(set_attr "type" "mtcr")]) ; The load-multiple instructions have similar properties. ; Note that "load_multiple" is a name known to the machine-independent diff --git a/gcc/config/rs6000/rs64.md b/gcc/config/rs6000/rs64.md new file mode 100644 index 0000000..e304094 --- /dev/null +++ b/gcc/config/rs6000/rs64.md @@ -0,0 +1,103 @@ +(define_automaton "rs64,rs64fp,rs64other") +(define_cpu_unit "iu_rs64" "rs64") +(define_cpu_unit "mciu_rs64" "rs64") +(define_cpu_unit "fpu_rs64" "rs64fp") +(define_cpu_unit "lsu_rs64,bpu_rs64" "rs64other") + +;; RS64a 64-bit IU, LSU, FPU, BPU + +(define_insn_reservation "rs64a-load" 2 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-store" 1 + (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-fpload" 3 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "rs64a")) + "lsu_rs64") + +(define_insn_reservation "rs64a-integer" 1 + (and (eq_attr "type" "integer") + (eq_attr "cpu" "rs64a")) + "iu_rs64") + +(define_insn_reservation "rs64a-imul" 20 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*13") + +(define_insn_reservation "rs64a-imul2" 12 + (and (eq_attr "type" "imul2") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*5") + +(define_insn_reservation "rs64a-imul3" 8 + (and (eq_attr "type" "imul3") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*2") + +(define_insn_reservation "rs64a-lmul" 34 + (and (eq_attr "type" "lmul") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*34") + +(define_insn_reservation "rs64a-idiv" 66 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*66") + +(define_insn_reservation "rs64a-ldiv" 66 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "rs64a")) + "mciu_rs64*66") + +(define_insn_reservation "rs64a-compare" 3 + (and (eq_attr "type" "compare,delayed_compare") + (eq_attr "cpu" "rs64a")) + "iu_rs64,nothing,bpu_rs64") + +(define_insn_reservation "rs64a-fpcompare" 5 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64,bpu_rs64") + +(define_insn_reservation "rs64a-fp" 4 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64") + +(define_insn_reservation "rs64a-sdiv" 31 + (and (eq_attr "type" "sdiv,ddiv") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64*31") + +(define_insn_reservation "rs64a-sqrt" 49 + (and (eq_attr "type" "ssqrt,dsqrt") + (eq_attr "cpu" "rs64a")) + "mciu_rs64,fpu_rs64*49") + +(define_insn_reservation "rs64a-mfcr" 2 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "rs64a")) + "mciu_rs64") + +(define_insn_reservation "rs64a-mtcr" 3 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "rs64a")) + "mciu_rs64") + +(define_insn_reservation "rs64a-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "rs64a")) + "bpu_rs64") + +(define_insn_reservation "rs64a-jmpreg" 1 + (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr") + (eq_attr "cpu" "rs64a")) + "bpu_rs64") + -- cgit v1.1