aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2018-02-08 12:32:51 +0000
committerWilco Dijkstra <wilco@gcc.gnu.org>2018-02-08 12:32:51 +0000
commit3f26f054872c375e7f9a80ede7a56036d9b57597 (patch)
treeddeddc6ae38f34532a1bc37c7d1e137dc788ad01 /gcc
parent18fbe394d62371bedaa41ed32c89c659109ae8f5 (diff)
downloadgcc-3f26f054872c375e7f9a80ede7a56036d9b57597.zip
gcc-3f26f054872c375e7f9a80ede7a56036d9b57597.tar.gz
gcc-3f26f054872c375e7f9a80ede7a56036d9b57597.tar.bz2
[AArch64] Use more LDP/STP in shrinkwrapping
The shrinkwrap optimization added in GCC 7 allows each callee-save to be delayed and done only across blocks which need a particular callee-save. Although this reduces unnecessary memory traffic on code paths that need few callee-saves, it typically uses LDR/STR rather than LDP/STP. This means more memory accesses and increased codesize, ~1.0% on average. To improve this, if a particular callee-save must be saved/restored, also add the adjacent callee-save to allow use of LDP/STP. This significantly reduces codesize (for example gcc_r, povray_r, parest_r, xalancbmk_r are 1% smaller). This is a simple fix which can be backported. A more advanced approach would scan blocks for pairs of callee-saves, but that requires a full rewrite of all the callee-save code which is too late at this stage. An example epilog in a shrinkwrapped function before: ldp x21, x22, [sp,#16] ldr x23, [sp,#32] ldr x24, [sp,#40] ldp x25, x26, [sp,#48] ldr x27, [sp,#64] ldr x28, [sp,#72] ldr x30, [sp,#80] ldr d8, [sp,#88] ldp x19, x20, [sp],#96 ret And after this patch: ldr d8, [sp,#88] ldp x21, x22, [sp,#16] ldp x23, x24, [sp,#32] ldp x25, x26, [sp,#48] ldp x27, x28, [sp,#64] ldr x30, [sp,#80] ldp x19, x20, [sp],#96 ret gcc/ * config/aarch64/aarch64.c (aarch64_components_for_bb): Increase LDP/STP opportunities by adding adjacent callee-saves. From-SVN: r257482
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/aarch64/aarch64.c17
2 files changed, 21 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c9a9fdb..ef2f46e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2018-02-08 Wilco Dijkstra <wdijkstr@arm.com>
+ * config/aarch64/aarch64.c (aarch64_components_for_bb):
+ Increase LDP/STP opportunities by adding adjacent callee-saves.
+
+2018-02-08 Wilco Dijkstra <wdijkstr@arm.com>
+
PR rtl-optimization/84068
PR rtl-optimization/83459
* haifa-sched.c (rank_for_schedule): Fix SCHED_PRESSURE_MODEL sorting.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7c9c6e5..228fd1b9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4552,7 +4552,22 @@ aarch64_components_for_bb (basic_block bb)
&& (bitmap_bit_p (in, regno)
|| bitmap_bit_p (gen, regno)
|| bitmap_bit_p (kill, regno)))
- bitmap_set_bit (components, regno);
+ {
+ unsigned regno2, offset, offset2;
+ bitmap_set_bit (components, regno);
+
+ /* If there is a callee-save at an adjacent offset, add it too
+ to increase the use of LDP/STP. */
+ offset = cfun->machine->frame.reg_offset[regno];
+ regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
+
+ if (regno2 <= LAST_SAVED_REGNUM)
+ {
+ offset2 = cfun->machine->frame.reg_offset[regno2];
+ if ((offset & ~8) == (offset2 & ~8))
+ bitmap_set_bit (components, regno2);
+ }
+ }
return components;
}