aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJiong Wang <jiwang@gcc.gnu.org>2015-08-06 16:08:39 +0000
committerJiong Wang <jiwang@gcc.gnu.org>2015-08-06 16:08:39 +0000
commitb2074542354ae9cd2166dc3d0529edf71b9ae8e6 (patch)
tree85b92d1dc9bfcdf08b06afb7e66e63116496e627 /gcc
parent201fa0b3a0a4aaf53ec0bcd154f40e2f63a8d960 (diff)
downloadgcc-b2074542354ae9cd2166dc3d0529edf71b9ae8e6.zip
gcc-b2074542354ae9cd2166dc3d0529edf71b9ae8e6.tar.gz
gcc-b2074542354ae9cd2166dc3d0529edf71b9ae8e6.tar.bz2
[AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt
The instruction sequences for preparing argument for TLS descriptor runtime resolver and the later function call to resolver can actually be hoisted out of the loop. Currently we can't because we have exposed the hard register X0 as destination of "set". While GCC's RTL data flow infrastructure will skip or do very conservative assumption when hard register involved in and thus some loop IV opportunities are missed. This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid expose x0 to gcc generic code. Generally, we define a new register class FIXED_R0 which only contains register 0, so the instruction sequences generated from the new add pattern is the same as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that RTL IV opt can handle it. Ideally, we should allow operand 0 to be any pseudo register, but then we can't model the override of x0 caused by the function call which is hidded by the UNSPEC. So here, we restricting operand 0 to be x0, the override of x0 can be reflected to the gcc. 2015-08-06 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Jiong Wang <jiong.wang@arm.com> gcc/ * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern. * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise. (aarch64_register_move_cost): Likewise. (aarch64_load_symref_appropriately): Invoke the new added pattern if possible. * config/aarch64/constraints.md (Uc0): New constraint. gcc/testsuite/ * gcc.target/aarch64/tlsdesc_hoist.c: New testcase. From-SVN: r226683
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog13
-rw-r--r--gcc/config/aarch64/aarch64.c24
-rw-r--r--gcc/config/aarch64/aarch64.md17
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c22
5 files changed, 75 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 795867a..ffcd947 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2015-08-06 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+ Jiong Wang <jiong.wang@arm.com>
+
+ * config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
+ * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
+ (REG_CLASS_NAMES): Likewise.
+ (REG_CLASS_CONTENTS): Likewise.
+ * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
+ (aarch64_register_move_cost): Likewise.
+ (aarch64_load_symref_appropriately): Invoke the new added pattern if
+ possible.
+ * config/aarch64/constraints.md (Uc0): New constraint.
+
2015-08-06 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/constraints.md (Usf): Add the test of
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e991a49..6da7245 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1048,12 +1048,26 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
gcc_assert (mode == Pmode || mode == ptr_mode);
- /* In ILP32, the got entry is always of SImode size. Unlike
- small GOT, the dest is fixed at reg 0. */
- if (TARGET_ILP32)
- emit_insn (gen_tlsdesc_small_si (imm));
+ if (can_create_pseudo_p ())
+ {
+ rtx reg = gen_reg_rtx (mode);
+
+ if (TARGET_ILP32)
+ emit_insn (gen_tlsdesc_small_pseudo_si (imm, reg));
+ else
+ emit_insn (gen_tlsdesc_small_pseudo_di (imm, reg));
+
+ emit_use (reg);
+ }
else
- emit_insn (gen_tlsdesc_small_di (imm));
+ {
+ /* In ILP32, the got entry is always of SImode size. Unlike
+ small GOT, the dest is fixed at reg 0. */
+ if (TARGET_ILP32)
+ emit_insn (gen_tlsdesc_small_si (imm));
+ else
+ emit_insn (gen_tlsdesc_small_di (imm));
+ }
tp = aarch64_load_tp (NULL);
if (mode != Pmode)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7f99753..5e8f210 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4549,6 +4549,23 @@
[(set_attr "type" "call")
(set_attr "length" "16")])
+;; The same as tlsdesc_small_<mode> except that we don't expose hard register X0
+;; as the destination of set as it will cause trouble for RTL loop iv.
+;; RTL loop iv will abort ongoing optimization once it finds there is hard reg
+;; as destination of set. This pattern thus could help these tlsdesc
+;; instruction sequences hoisted out of loop.
+(define_insn "tlsdesc_small_pseudo_<mode>"
+ [(set (match_operand:PTR 1 "register_operand" "=r")
+ (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
+ UNSPEC_TLSDESC))
+ (clobber (reg:DI R0_REGNUM))
+ (clobber (reg:DI LR_REGNUM))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_TLS_DESC"
+ "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+ [(set_attr "type" "call")
+ (set_attr "length" "16")])
+
(define_insn "stack_tie"
[(set (mem:BLK (scratch))
(unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0cd004b..0d4937c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
2015-08-06 Jiong Wang <jiong.wang@arm.com>
+ * gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
+
+2015-08-06 Jiong Wang <jiong.wang@arm.com>
+
* gcc.target/aarch64/noplt_3.c: New testcase.
2015-08-06 Jiong Wang <jiong.wang@arm.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c b/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c
new file mode 100644
index 0000000..a1fd3b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target tls_native } */
+/* { dg-options "-O2 -fpic -fdump-rtl-loop2_invariant" } */
+/* { dg-skip-if "-mcmodel=large, no support for -fpic" { aarch64-*-* } { "-mcmodel=large" } { "" } } */
+
+int cal (int, int);
+__thread int tls_data;
+
+int
+foo (int bound)
+{
+ int i = 0;
+ int sum = 0;
+
+ for (i; i < bound; i++)
+ sum = cal (sum, tls_data);
+
+ return sum;
+}
+
+/* Insn sequences for TLS descriptor should be hoisted out of the loop. */
+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */