aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreta Yorsh <greta.yorsh@arm.com>2012-12-21 09:49:58 +0000
committerGreta Yorsh <gretay@gcc.gnu.org>2012-12-21 09:49:58 +0000
commitffeffdcb913ff6273c6bb95dd4872cd9a4e492c7 (patch)
treec00a2c28032b1f4e0dfcec843d572510b968e776
parent2e612eb2e2fca6e3db59b098e975559c72704141 (diff)
downloadgcc-ffeffdcb913ff6273c6bb95dd4872cd9a4e492c7.zip
gcc-ffeffdcb913ff6273c6bb95dd4872cd9a4e492c7.tar.gz
gcc-ffeffdcb913ff6273c6bb95dd4872cd9a4e492c7.tar.bz2
cortex-a7.md: New file.
gcc/ 2012-12-21 Greta Yorsh <Greta.Yorsh@arm.com> * config/arm/cortex-a7.md: New file. * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md. * config/arm/arm.md: Include cortex-a7.md. (generic_sched): Don't use generic scheduler for Cortex-A7. (generic_vfp): Likewise. * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder. (arm_sched_reorder,cortexa7_sched_reorder): New function. (cortexa7_older_only,cortexa7_younger): Likewise. (arm_issue_rate): Add Cortex-A7. From-SVN: r194656
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/config/arm/arm.c163
-rw-r--r--gcc/config/arm/arm.md5
-rw-r--r--gcc/config/arm/cortex-a7.md353
-rw-r--r--gcc/config/arm/t-arm1
5 files changed, 532 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 29cab03..90ecae0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2012-12-21 Greta Yorsh <Greta.Yorsh@arm.com>
+
+ * config/arm/cortex-a7.md: New file.
+ * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md.
+ * config/arm/arm.md: Include cortex-a7.md.
+ (generic_sched): Don't use generic scheduler for Cortex-A7.
+ (generic_vfp): Likewise.
+ * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
+ (arm_sched_reorder,cortexa7_sched_reorder): New function.
+ (cortexa7_older_only,cortexa7_younger): Likewise.
+ (arm_issue_rate): Add Cortex-A7.
+
2012-12-20 Ian Bolton <ian.bolton@arm.com>
* gcc/config/aarch64/aarch64.md
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 4484bc9..13d745f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int);
+static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
static int optimal_immediate_sequence (enum rtx_code code,
unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence);
@@ -367,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER arm_sched_reorder
+
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
@@ -8694,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
}
}
+
+/* Return true if and only if this insn can dual-issue only as older. */
+static bool
+cortexa7_older_only (rtx insn)
+{
+ if (recog_memoized (insn) < 0)
+ return false;
+
+ if (get_attr_insn (insn) == INSN_MOV)
+ return false;
+
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU_REG:
+ case TYPE_LOAD_BYTE:
+ case TYPE_LOAD1:
+ case TYPE_STORE1:
+ case TYPE_FFARITHS:
+ case TYPE_FADDS:
+ case TYPE_FFARITHD:
+ case TYPE_FADDD:
+ case TYPE_FCPYS:
+ case TYPE_F_CVT:
+ case TYPE_FCMPS:
+ case TYPE_FCMPD:
+ case TYPE_FCONSTS:
+ case TYPE_FCONSTD:
+ case TYPE_FMULS:
+ case TYPE_FMACS:
+ case TYPE_FMULD:
+ case TYPE_FMACD:
+ case TYPE_FDIVS:
+ case TYPE_FDIVD:
+ case TYPE_F_2_R:
+ case TYPE_F_FLAG:
+ case TYPE_F_LOADS:
+ case TYPE_F_STORES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Return true if and only if this insn can dual-issue as younger. */
+static bool
+cortexa7_younger (FILE *file, int verbose, rtx insn)
+{
+ if (recog_memoized (insn) < 0)
+ {
+ if (verbose > 5)
+ fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
+ return false;
+ }
+
+ if (get_attr_insn (insn) == INSN_MOV)
+ return true;
+
+ switch (get_attr_type (insn))
+ {
+ case TYPE_SIMPLE_ALU_IMM:
+ case TYPE_SIMPLE_ALU_SHIFT:
+ case TYPE_BRANCH:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/* Look for an instruction that can dual issue only as an older
+ instruction, and move it in front of any instructions that can
+ dual-issue as younger, while preserving the relative order of all
+ other instructions in the ready list. This is a hueuristic to help
+ dual-issue in later cycles, by postponing issue of more flexible
+ instructions. This heuristic may affect dual issue opportunities
+ in the current cycle. */
+static void
+cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+ int clock)
+{
+ int i;
+ int first_older_only = -1, first_younger = -1;
+
+ if (verbose > 5)
+ fprintf (file,
+ ";; sched_reorder for cycle %d with %d insns in ready list\n",
+ clock,
+ *n_readyp);
+
+ /* Traverse the ready list from the head (the instruction to issue
+ first), and looking for the first instruction that can issue as
+ younger and the first instruction that can dual-issue only as
+ older. */
+ for (i = *n_readyp - 1; i >= 0; i--)
+ {
+ rtx insn = ready[i];
+ if (cortexa7_older_only (insn))
+ {
+ first_older_only = i;
+ if (verbose > 5)
+ fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
+ break;
+ }
+ else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
+ first_younger = i;
+ }
+
+ /* Nothing to reorder because either no younger insn found or insn
+ that can dual-issue only as older appears before any insn that
+ can dual-issue as younger. */
+ if (first_younger == -1)
+ {
+ if (verbose > 5)
+ fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
+ return;
+ }
+
+ /* Nothing to reorder because no older-only insn in the ready list. */
+ if (first_older_only == -1)
+ {
+ if (verbose > 5)
+ fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
+ return;
+ }
+
+ /* Move first_older_only insn before first_younger. */
+ if (verbose > 5)
+ fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
+ INSN_UID(ready [first_older_only]),
+ INSN_UID(ready [first_younger]));
+ rtx first_older_only_insn = ready [first_older_only];
+ for (i = first_older_only; i < first_younger; i++)
+ {
+ ready[i] = ready[i+1];
+ }
+
+ ready[i] = first_older_only_insn;
+ return;
+}
+
+/* Implement TARGET_SCHED_REORDER. */
+static int
+arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+ int clock)
+{
+ switch (arm_tune)
+ {
+ case cortexa7:
+ cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
+ break;
+ default:
+ /* Do nothing for other cores. */
+ break;
+ }
+
+ return arm_issue_rate ();
+}
+
/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
It corrects the value of COST based on the relationship between
INSN and DEP through the dependence LINK. It returns the new
@@ -25480,6 +25642,7 @@ arm_issue_rate (void)
case cortexr5:
case genericv7a:
case cortexa5:
+ case cortexa7:
case cortexa8:
case cortexa9:
case fa726te:
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 385a58d..1cb1515 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -502,7 +502,7 @@
(define_attr "generic_sched" "yes,no"
(const (if_then_else
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4")
(eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
@@ -510,7 +510,7 @@
(define_attr "generic_vfp" "yes,no"
(const (if_then_else
(and (eq_attr "fpu" "vfp")
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "no"))
(const_string "yes")
(const_string "no"))))
@@ -527,6 +527,7 @@
(include "fmp626.md")
(include "fa726te.md")
(include "cortex-a5.md")
+(include "cortex-a7.md")
(include "cortex-a8.md")
(include "cortex-a9.md")
(include "cortex-a15.md")
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
new file mode 100644
index 0000000..74d4ca0
--- /dev/null
+++ b/gcc/config/arm/cortex-a7.md
@@ -0,0 +1,353 @@
+;; ARM Cortex-A7 pipeline description
+;; Copyright (C) 2012 Free Software Foundation, Inc.
+;;
+;; Contributed by ARM Ltd.
+;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a7")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Functional units.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The Cortex-A7 pipeline integer and vfp pipeline.
+;; The decode is the same for all instructions, so do not model it.
+;; We only model the first execution stage because
+;; instructions always advance one stage per cycle in order.
+;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together.
+
+(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7")
+
+(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2")
+
+(define_cpu_unit "cortex_a7_branch" "cortex_a7")
+
+;; Cortex-A7 is in order and can dual-issue under limited circumstances.
+;; ex2 can be reserved only after ex1 is reserved.
+
+(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1")
+
+;; Pseudo-unit for blocking the multiply pipeline when a double-precision
+;; multiply is in progress.
+
+(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7")
+
+;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
+;; of the add pipeline by fmac instructions, etc.
+
+(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7")
+
+;; Floating-point div/sqrt (long latency, out-of-order completion).
+
+(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7")
+
+;; Neon pipeline
+(define_cpu_unit "cortex_a7_neon" "cortex_a7")
+
+(define_reservation "cortex_a7_all" "cortex_a7_both+\
+ cortex_a7_fpmul_pipe+\
+ cortex_a7_fpadd_pipe+\
+ cortex_a7_fp_div_sqrt+\
+ cortex_a7_neon")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branches.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; A direct branch can dual issue either as younger or older instruction,
+;; but branches cannot dual issue with branches.
+;; No latency as there is no result.
+
+(define_insn_reservation "cortex_a7_branch" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "branch")
+ (eq_attr "neon_type" "none")))
+ "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
+
+;; A call reserves all issue slots. The result is available the next cycle.
+(define_insn_reservation "cortex_a7_call" 1
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "call")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_all")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instruction with an immediate operand can dual-issue.
+(define_insn_reservation "cortex_a7_alu_imm" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (ior (eq_attr "type" "simple_alu_imm")
+ (ior (eq_attr "type" "simple_alu_shift")
+ (and (eq_attr "insn" "mov")
+ (not (eq_attr "length" "8")))))
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex2|cortex_a7_ex1")
+
+;; ALU instruction with register operands can dual-issue
+;; with a younger immediate-based instruction.
+(define_insn_reservation "cortex_a7_alu_reg" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "alu_reg")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_alu_shift" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "alu_shift,alu_shift_reg")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+;; Forwarding path for unshifted operands.
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+ "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul")
+
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+ "cortex_a7_store*"
+ "arm_no_early_store_addr_dep")
+
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+ "cortex_a7_alu_shift"
+ "arm_no_early_alu_shift_dep")
+
+;; The multiplier pipeline can forward results from wr stage only so
+;; there's no need to specify bypasses.
+;; Multiply instructions cannot dual-issue.
+
+(define_insn_reservation "cortex_a7_mul" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "mult")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+;; The latency depends on the operands, so we use an estimate here.
+(define_insn_reservation "cortex_a7_idiv" 5
+ (and (eq_attr "tune" "cortexa7")
+ (eq_attr "insn" "udiv,sdiv"))
+ "cortex_a7_all*5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Address-generation happens in the issue stage.
+;; Double-word accesses can be issued in a single cycle,
+;; and occupy only one pipeline stage.
+
+(define_insn_reservation "cortex_a7_load1" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load_byte,load1")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_store1" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store1")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_load2" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load2")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_store2" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store2")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_load3" 3
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load3")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_store3" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store4")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_load4" 3
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load4")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_store4" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store3")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_both")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point arithmetic.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_fpalu" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\
+ f_cvt, fcmps, fcmpd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpadd_pipe")
+
+;; For fconsts and fconstd, 8-bit immediate data is passed directly from
+;; f1 to f3 (which I think reduces the latency by one cycle).
+
+(define_insn_reservation "cortex_a7_fconst" 3
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fconsts,fconstd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpadd_pipe")
+
+;; We should try not to attempt to issue a single-precision multiplication in
+;; the middle of a double-precision multiplication operation (the usage of
+;; cortex_a7_fpmul_pipe).
+
+(define_insn_reservation "cortex_a7_fpmuls" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmuls")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+;; For single-precision multiply-accumulate, the add (accumulate) is issued
+;; whilst the multiply is in F4. The multiply result can then be forwarded
+;; from F5 to F1. The issue unit is only used once (when we first start
+;; processing the instruction), but the usage of the FP add pipeline could
+;; block other instructions attempting to use it simultaneously. We try to
+;; avoid that using cortex_a7_fpadd_pipe.
+
+(define_insn_reservation "cortex_a7_fpmacs" 8
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmacs")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
+
+;; Non-multiply instructions can issue between two cycles of a
+;; double-precision multiply.
+
+(define_insn_reservation "cortex_a7_fpmuld" 7
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmuld")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
+ cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+(define_insn_reservation "cortex_a7_fpmacd" 11
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmacd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
+ cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point divide/square root instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_fdivs" 16
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fdivs")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14")
+
+(define_insn_reservation "cortex_a7_fdivd" 29
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fdivd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP to/from core transfers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Core-to-VFP transfers.
+
+(define_insn_reservation "cortex_a7_r2f" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "r_2_f")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_f2r" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_2_r")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP flag transfer.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Fuxne: The flag forwarding from fmstat to the second instruction is
+;; not modeled at present.
+
+(define_insn_reservation "cortex_a7_f_flags" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_flag")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_f_loads" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_loads")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_f_loadd" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_loadd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_f_stores" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_stores")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_f_stored" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_stored")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+;; Load-to-use for floating-point values has a penalty of one cycle,
+;; i.e. a latency of two.
+
+(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
+ "cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\
+ cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\
+ cortex_a7_f2r")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; NEON load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+(define_insn_reservation "cortex_a7_neon" 4
+ (and (eq_attr "tune" "cortexa7")
+ (eq_attr "neon_type" "!none"))
+ "cortex_a7_both*2")
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 731b614..2ceb938 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/constraints.md \
$(srcdir)/config/arm/cortex-a15.md \
$(srcdir)/config/arm/cortex-a5.md \
+ $(srcdir)/config/arm/cortex-a7.md \
$(srcdir)/config/arm/cortex-a8.md \
$(srcdir)/config/arm/cortex-a8-neon.md \
$(srcdir)/config/arm/cortex-a9.md \