aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2017-01-11 16:38:42 +0000
committerWilco Dijkstra <wilco@gcc.gnu.org>2017-01-11 16:38:42 +0000
commitd2471b3168260e0c467672b308a2bb5eda10ff95 (patch)
tree41d764d66c719c39a698fd982608534d57c2593f
parentdbd8471076ba7d89e3c62a6f2fc82cc3abc82a9d (diff)
downloadgcc-d2471b3168260e0c467672b308a2bb5eda10ff95.zip
gcc-d2471b3168260e0c467672b308a2bb5eda10ff95.tar.gz
gcc-d2471b3168260e0c467672b308a2bb5eda10ff95.tar.bz2
My previous change to the Cortex-A53 scheduler resulted in a 13% regression on a proprietary benchmark.
My previous change to the Cortex-A53 scheduler resulted in a 13% regression on a proprietary benchmark. This turned out to be due to non-optimal scheduling of int to float conversions. This patch separates int to FP transfers from int to float conversions based on experiments to determine the best schedule. As a result of these tweaks the performance of the benchmark improves by 20%. gcc/ * config/arm/cortex-a53.md: Add bypasses for cortex_a53_r2f_cvt. (cortex_a53_r2f): Only use for transfers. (cortex_a53_f2r): Likewise. (cortex_a53_r2f_cvt): Add reservation for conversions. (cortex_a53_f2r_cvt): Likewise. From-SVN: r244322
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/arm/cortex-a53.md37
2 files changed, 35 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bdf19ee..d0aae19 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2017-01-11 Wilco Dijkstra <wdijkstr@arm.com>
+
+ * config/arm/cortex-a53.md: Add bypasses for
+ cortex_a53_r2f_cvt.
+ (cortex_a53_r2f): Only use for transfers.
+ (cortex_a53_f2r): Likewise.
+ (cortex_a53_r2f_cvt): Add reservation for conversions.
+ (cortex_a53_f2r_cvt): Likewise.
+
2017-01-11 Tamar Christina <tamar.christina@arm.com>
* config/arm/arm_neon.h: Add __artificial__ and gnu_inline
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index fbec5df..7cf5fc5 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -260,9 +260,18 @@
"cortex_a53_r2f")
(define_bypass 1 "cortex_a53_mul,
- cortex_a53_load*"
+ cortex_a53_load1,
+ cortex_a53_load2"
"cortex_a53_r2f")
+(define_bypass 2 "cortex_a53_alu*"
+ "cortex_a53_r2f_cvt")
+
+(define_bypass 3 "cortex_a53_mul,
+ cortex_a53_load1,
+ cortex_a53_load2"
+ "cortex_a53_r2f_cvt")
+
;; Model flag forwarding to branches.
(define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
@@ -522,19 +531,25 @@
;; Floating-point to/from core transfers.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn_reservation "cortex_a53_r2f" 6
+(define_insn_reservation "cortex_a53_r2f" 2
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "f_mcr,f_mcrr"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
+
+(define_insn_reservation "cortex_a53_f2r" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "f_mrc,f_mrrc"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
+
+(define_insn_reservation "cortex_a53_r2f_cvt" 4
(and (eq_attr "tune" "cortexa53")
- (eq_attr "type" "f_mcr,f_mcrr,f_cvti2f,
- neon_from_gp, neon_from_gp_q"))
- "cortex_a53_slot_any,cortex_a53_store,
- nothing,cortex_a53_fp_alu")
+ (eq_attr "type" "f_cvti2f, neon_from_gp, neon_from_gp_q"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
-(define_insn_reservation "cortex_a53_f2r" 6
+(define_insn_reservation "cortex_a53_f2r_cvt" 5
(and (eq_attr "tune" "cortexa53")
- (eq_attr "type" "f_mrc,f_mrrc,f_cvtf2i,
- neon_to_gp, neon_to_gp_q"))
- "cortex_a53_slot_any,cortex_a53_fp_alu,
- nothing,cortex_a53_store")
+ (eq_attr "type" "f_cvtf2i, neon_to_gp, neon_to_gp_q"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point flag transfer.