diff options
author | Matthew Gretton-Dann <matthew.gretton-dann@arm.com> | 2011-11-30 13:51:35 +0000 |
---|---|---|
committer | Matthew Gretton-Dann <mgretton@gcc.gnu.org> | 2011-11-30 13:51:35 +0000 |
commit | ef5784915f07bf18222c76fb035e7ef4b2f90462 (patch) | |
tree | 231db9739cf93bbd5f25110e6cc37709bb003cd0 /gcc | |
parent | c819d921c9b0ccb6d29fb3a6c5cb6cd12b77d2f6 (diff) | |
download | gcc-ef5784915f07bf18222c76fb035e7ef4b2f90462.zip gcc-ef5784915f07bf18222c76fb035e7ef4b2f90462.tar.gz gcc-ef5784915f07bf18222c76fb035e7ef4b2f90462.tar.bz2 |
arm.c (arm_issue_rate): Cortex-A15 can triple issue.
* gcc/config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue.
* gcc/config/arm/arm.md (mul64): New attribute.
(generic_sched): Cortex-A15 is not scheduled generically.
(cortex-a15.md): Include.
* gcc/config/arm/cortex-a15.md: New machine description.
* gcc/config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md.
From-SVN: r181843
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 3 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 10 | ||||
-rw-r--r-- | gcc/config/arm/cortex-a15.md | 186 | ||||
-rw-r--r-- | gcc/config/arm/t-arm | 1 |
5 files changed, 208 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 53ac833..606cbe9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + * config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue. + * config/arm/arm.md (mul64): New attribute. + (generic_sched): Cortex-A15 is not scheduled generically. + (cortex-a15.md): Include. + * config/arm/cortex-a15.md: New machine description. + * config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md. + +2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + * config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed. 2011-11-30 Iain Sandoe <iains@gcc.gnu.org> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index ee26c51..a57494c 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -24127,6 +24127,9 @@ arm_issue_rate (void) { switch (arm_tune) { + case cortexa15: + return 3; + case cortexr4: case cortexr4f: case cortexr5: diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 7ac3f5c..8ec9b22 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -355,6 +355,13 @@ (const_string "mult") (const_string "alu"))) +; Is this an (integer side) multiply with a 64-bit result? +(define_attr "mul64" "no,yes" + (if_then_else + (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") + (const_string "yes") + (const_string "no"))) + ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) @@ -518,7 +525,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -544,6 +551,7 @@ (include "cortex-a5.md") (include "cortex-a8.md") (include "cortex-a9.md") +(include "cortex-a15.md") (include "cortex-r4.md") (include "cortex-r4f.md") (include "cortex-m4.md") diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md new file mode 100644 index 0000000..ccab7cb --- /dev/null +++ b/gcc/config/arm/cortex-a15.md @@ -0,0 +1,186 @@ +;; ARM Cortex-A15 pipeline description +;; Copyright (C) 2011 Free Software Foundation, Inc. +;; +;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com> + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "cortex_a15") + +;; The Cortex-A15 core is modelled as a triple issue pipeline that has +;; the following dispatch units. +;; 1. Two pipelines for simple integer operations: SX1, SX2 +;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2 +;; 3. One pipeline for branch operations: BX +;; 4. One pipeline for integer multiply and divide operations: MX +;; 5. Two pipelines for load and store operations: LS1, LS2 +;; +;; We can issue into three pipelines per-cycle. +;; +;; We assume that where we have unit pairs xx1 is always filled before xx2. + +;; The three issue units +(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15") + +(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)") +(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))") +(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)") +(final_presence_set "ca15_i1" "ca15_i0") +(final_presence_set "ca15_i2" "ca15_i1") + +;; The main dispatch units +(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15") +(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15") +(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15") +(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15") + +(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)") + +;; The extended load-store pipeline +(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15") + +;; The extended ALU pipeline +(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15") +(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15") + +;; Simple Execution Unit: +;; +;; Simple ALU without shift +(define_insn_reservation "cortex_a15_alu" 2 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "alu") + (eq_attr "neon_type" "none"))) + "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") + +;; ALU ops with immediate shift +(define_insn_reservation "cortex_a15_alu_shift" 3 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "alu_shift") + (eq_attr "neon_type" "none"))) + "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ + |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") + +;; ALU ops with register controlled shift +(define_insn_reservation "cortex_a15_alu_shift_reg" 3 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "alu_shift_reg") + (eq_attr "neon_type" "none"))) + "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ + |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ + |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)") + +;; Multiply Execution Unit: +;; +;; 32-bit multiplies +(define_insn_reservation "cortex_a15_mult32" 3 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "mult") + (and (eq_attr "neon_type" "none") + (eq_attr "mul64" "no")))) + "ca15_issue1,ca15_mx") + +;; 64-bit multiplies +(define_insn_reservation "cortex_a15_mult64" 4 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "mult") + (and (eq_attr "neon_type" "none") + (eq_attr "mul64" "yes")))) + "ca15_issue1,ca15_mx*2") + +;; Integer divide +(define_insn_reservation "cortex_a15_udiv" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "insn" "udiv")) + "ca15_issue1,ca15_mx") + +(define_insn_reservation "cortex_a15_sdiv" 10 + (and (eq_attr "tune" "cortexa15") + (eq_attr "insn" "sdiv")) + "ca15_issue1,ca15_mx") + +;; Block all issue pipes for a cycle +(define_insn_reservation "cortex_a15_block" 1 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "block") + (eq_attr "neon_type" "none"))) + "ca15_issue3") + +;; Branch execution Unit +;; +;; Branches take one issue slot. +;; No latency as there is no result +(define_insn_reservation "cortex_a15_branch" 0 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "branch") + (eq_attr "neon_type" "none"))) + "ca15_issue1,ca15_bx") + + +;; We lie with calls. They take up all issue slots, and form a block in the +;; pipeline. The result however is available the next cycle. +;; +;; Addition of new units requires this to be updated. +(define_insn_reservation "cortex_a15_call" 1 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "call") + (eq_attr "neon_type" "none"))) + "ca15_issue3,\ + ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\ + ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\ + +ca15_sx2_sat+ca15_ldr+ca15_str") + +;; Load-store execution Unit +;; +;; Loads of up to two words. +(define_insn_reservation "cortex_a15_load1" 4 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "load_byte,load1,load2") + (eq_attr "neon_type" "none"))) + "ca15_issue1,ca15_ls,ca15_ldr,nothing") + +;; Loads of three or four words. +(define_insn_reservation "cortex_a15_load3" 5 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "load3,load4") + (eq_attr "neon_type" "none"))) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing") + +;; Stores of up to two words. +(define_insn_reservation "cortex_a15_store1" 0 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "store1,store2") + (eq_attr "neon_type" "none"))) + "ca15_issue1,ca15_ls,ca15_str") + +;; Stores of three or four words. +(define_insn_reservation "cortex_a15_store3" 0 + (and (eq_attr "tune" "cortexa15") + (and (eq_attr "type" "store3,store4") + (eq_attr "neon_type" "none"))) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str") + +;; Simple execution unit bypasses +(define_bypass 1 "cortex_a15_alu" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 2 "cortex_a15_alu_shift" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 2 "cortex_a15_alu_shift_reg" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3") +(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3") +(define_bypass 2 "cortex_a15_alu_shift_reg" + "cortex_a15_load1,cortex_a15_load3") diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 4b94a7e..1128d19 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -31,6 +31,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/arm-tune.md \ $(srcdir)/config/arm/cirrus.md \ $(srcdir)/config/arm/constraints.md \ + $(srcdir)/config/arm/cortex-a15.md \ $(srcdir)/config/arm/cortex-a5.md \ $(srcdir)/config/arm/cortex-a8.md \ $(srcdir)/config/arm/cortex-a8-neon.md \ |