aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-09-30 16:47:21 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-09-30 16:47:21 +0000
commitce9d2a37f2db20328286f5d3d5a13a4e765c59f7 (patch)
treee33d3b12ab823d3002276f63b725cc03e05af85e
parent6d1e98dfd2bfce30640d71df355bedf114229744 (diff)
downloadgcc-ce9d2a37f2db20328286f5d3d5a13a4e765c59f7.zip
gcc-ce9d2a37f2db20328286f5d3d5a13a4e765c59f7.tar.gz
gcc-ce9d2a37f2db20328286f5d3d5a13a4e765c59f7.tar.bz2
[AArch64] Allow shrink-wrapping of non-leaf vector PCS functions
With the function ABI stuff, we can now support shrink-wrapping of non-leaf vector PCS functions. This is particularly useful if the vector PCS function calls an ordinary function on an error path, since we can then keep the extra saves and restores specific to that path too. 2019-09-30 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p): Delete. * config/aarch64/aarch64.c (aarch64_components_for_bb): Check whether the block calls a function that clobbers more registers than the current function is allowed to. (aarch64_use_simple_return_insn_p): Delete. * config/aarch64/aarch64.md (simple_return): Remove condition. gcc/testsuite/ * gcc.target/aarch64/torture/simd-abi-9.c: New test. From-SVN: r276340
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64.c36
-rw-r--r--gcc/config/aarch64/aarch64.md2
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c48
6 files changed, 83 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 28e411c..7b0bcef 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
+ * config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p):
+ Delete.
+ * config/aarch64/aarch64.c (aarch64_components_for_bb): Check
+ whether the block calls a function that clobbers more registers
+ than the current function is allowed to.
+ (aarch64_use_simple_return_insn_p): Delete.
+ * config/aarch64/aarch64.md (simple_return): Remove condition.
+
+2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
+
* function-abi.h (function_abi_aggregator): New class.
* function-abi.cc (function_abi_aggregator::caller_save_regs): New
function.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index a870eb7..c9a3423 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -531,7 +531,6 @@ bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
bool aarch64_uimm12_shift (HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
-bool aarch64_use_simple_return_insn_p (void);
const char *aarch64_output_casesi (rtx *);
enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2d4cd37..3da92a2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5976,13 +5976,30 @@ aarch64_components_for_bb (basic_block bb)
sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
bitmap_clear (components);
+ /* Clobbered registers don't generate values in any meaningful sense,
+ since nothing after the clobber can rely on their value. And we can't
+ say that partially-clobbered registers are unconditionally killed,
+ because whether they're killed or not depends on the mode of the
+ value they're holding. Thus partially call-clobbered registers
+ appear in neither the kill set nor the gen set.
+
+ Check manually for any calls that clobber more of a register than the
+ current function can. */
+ function_abi_aggregator callee_abis;
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (CALL_P (insn))
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
+ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
if ((!call_used_or_fixed_reg_p (regno)
|| (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
- && (bitmap_bit_p (in, regno)
- || bitmap_bit_p (gen, regno)
- || bitmap_bit_p (kill, regno)))
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+ || bitmap_bit_p (in, regno)
+ || bitmap_bit_p (gen, regno)
+ || bitmap_bit_p (kill, regno)))
{
unsigned regno2, offset, offset2;
bitmap_set_bit (components, regno);
@@ -6648,19 +6665,6 @@ aarch64_use_return_insn_p (void)
return known_eq (cfun->machine->frame.frame_size, 0);
}
-/* Return false for non-leaf SIMD functions in order to avoid
- shrink-wrapping them. Doing this will lose the necessary
- save/restore of FP registers. */
-
-bool
-aarch64_use_simple_return_insn_p (void)
-{
- if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
- return false;
-
- return true;
-}
-
/* Generate the epilogue instructions for returning from a function.
This is almost exactly the reverse of the prolog sequence, except
that we need to insert barriers to avoid scheduling loads that read
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index edeaa6f..fcba5ac 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -799,7 +799,7 @@
(define_insn "simple_return"
[(simple_return)]
- "aarch64_use_simple_return_insn_p ()"
+ ""
"ret"
[(set_attr "type" "branch")]
)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2ac61ff..ce3a967 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/torture/simd-abi-9.c: New test.
+
+2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/torture/simd-abi-8.c: New test.
2019-09-30 Richard Sandiford <richard.sandiford@arm.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c
new file mode 100644
index 0000000..aaa0316
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-fshrink-wrap -ffat-lto-objects" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+int callee (void);
+
+/*
+** caller:
+** ldr (w[0-9]+), \[x0\]
+** cbn?z \1, [^\n]*
+** ...
+** ret
+*/
+int __attribute__ ((aarch64_vector_pcs))
+caller (int *x)
+{
+ if (*x)
+ return callee () + 1;
+ else
+ return 0;
+}
+
+/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */
+/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */
+/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */
+/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */
+/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */
+/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */
+/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */
+/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */
+/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */
+/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */
+/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */
+/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */
+/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */
+/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */
+/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */
+/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */
+
+/* { dg-final { scan-assembler-not {\tstp\tq[0-7],} } } */
+/* { dg-final { scan-assembler-not {\tldp\tq[0-7],} } } */
+/* { dg-final { scan-assembler-not {\tstp\tq2[4-9],} } } */
+/* { dg-final { scan-assembler-not {\tldp\tq2[4-9],} } } */
+/* { dg-final { scan-assembler-not {\tstp\td} } } */
+/* { dg-final { scan-assembler-not {\tldp\td} } } */
+/* { dg-final { scan-assembler-not {\tstr\tq} } } */
+/* { dg-final { scan-assembler-not {\tldr\tq} } } */