aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2025-04-29 16:07:37 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2025-04-29 16:07:37 +0100
commit68a75e3c0dcdb883cc96b626b541da20ab4df2f2 (patch)
tree41f760b89269b9fd12c04ca9b8fb810b9a473f94 /gcc
parent972a03737284b8611ec4e6315f6ca04d56ec05bf (diff)
downloadgcc-68a75e3c0dcdb883cc96b626b541da20ab4df2f2.zip
gcc-68a75e3c0dcdb883cc96b626b541da20ab4df2f2.tar.gz
gcc-68a75e3c0dcdb883cc96b626b541da20ab4df2f2.tar.bz2
aarch64: Fix CFA offsets in non-initial stack probes [PR119610]
PR119610 is about incorrect CFI output for a stack probe when that probe is not the initial allocation. The main aarch64 stack probe function, aarch64_allocate_and_probe_stack_space, implicitly assumed that the incoming stack pointer pointed to the top of the frame, and thus held the CFA. aarch64_save_callee_saves and aarch64_restore_callee_saves use a parameter called bytes_below_sp to track how far the stack pointer is above the base of the static frame. This patch does the same thing for aarch64_allocate_and_probe_stack_space. Also, I noticed that the SVE path was attaching the first CFA note to the wrong instruction: it was attaching the note to the calculation of the stack size, rather than to the r11<-sp copy. gcc/ PR target/119610 * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): Add a bytes_below_sp parameter and use it to calculate the CFA offsets. Attach the first SVE CFA note to the move into the associated temporary register. (aarch64_allocate_and_probe_stack_space): Update calls accordingly. Start out with bytes_per_sp set to the frame size and decrement it after each allocation. gcc/testsuite/ PR target/119610 * g++.dg/torture/pr119610.C: New test. * g++.target/aarch64/sve/pr119610-sve.C: Likewise. (cherry picked from commit fa61afef18a8566d1907a5ae0e7754e1eac207d9)
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64.cc66
-rw-r--r--gcc/testsuite/g++.dg/torture/pr119610.C18
-rw-r--r--gcc/testsuite/g++.target/aarch64/sve/pr119610-sve.C20
3 files changed, 78 insertions, 26 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 433ec97..08506e9 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9417,13 +9417,16 @@ aarch64_emit_stack_tie (rtx reg)
}
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
- registers. If POLY_SIZE is not large enough to require a probe this function
- will only adjust the stack. When allocating the stack space
- FRAME_RELATED_P is then used to indicate if the allocation is frame related.
- FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
- the saved registers. If we are then we ensure that any allocation
- larger than the ABI defined buffer needs a probe so that the
- invariant of having a 1KB buffer is maintained.
+ registers, given that the stack pointer is currently BYTES_BELOW_SP bytes
+ above the bottom of the static frame.
+
+ If POLY_SIZE is not large enough to require a probe this function will only
+ adjust the stack. When allocating the stack space FRAME_RELATED_P is then
+ used to indicate if the allocation is frame related. FINAL_ADJUSTMENT_P
+ indicates whether we are allocating the area below the saved registers.
+ If we are then we ensure that any allocation larger than the ABI defined
+ buffer needs a probe so that the invariant of having a 1KB buffer is
+ maintained.
We emit barriers after each stack adjustment to prevent optimizations from
breaking the invariant that we never drop the stack more than a page. This
@@ -9440,6 +9443,7 @@ aarch64_emit_stack_tie (rtx reg)
static void
aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
poly_int64 poly_size,
+ poly_int64 bytes_below_sp,
aarch64_isa_mode force_isa_mode,
bool frame_related_p,
bool final_adjustment_p)
@@ -9503,8 +9507,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
poly_size, temp1, temp2, force_isa_mode,
false, true);
- rtx_insn *insn = get_last_insn ();
-
+ auto initial_cfa_offset = frame.frame_size - bytes_below_sp;
+ auto final_cfa_offset = initial_cfa_offset + poly_size;
if (frame_related_p)
{
/* This is done to provide unwinding information for the stack
@@ -9514,28 +9518,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
The tie will expand to nothing but the optimizers will not touch
the instruction. */
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
- emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+ auto *insn = emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
aarch64_emit_stack_tie (stack_ptr_copy);
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
- add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, stack_ptr_copy,
+ initial_cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
rtx guard_const = gen_int_mode (guard_size, Pmode);
- insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
- stack_pointer_rtx, temp1,
- probe_const, guard_const));
+ auto *insn
+ = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
+ stack_pointer_rtx, temp1,
+ probe_const, guard_const));
/* Now reset the CFA register if needed. */
if (frame_related_p)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- gen_int_mode (poly_size, Pmode)));
+ plus_constant (Pmode, stack_pointer_rtx,
+ final_cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9581,12 +9588,13 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
We can determine which allocation we are doing by looking at
the value of FRAME_RELATED_P since the final allocations are not
frame related. */
+ auto cfa_offset = frame.frame_size - (bytes_below_sp - rounded_size);
if (frame_related_p)
{
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, temp1, rounded_size));
+ plus_constant (Pmode, temp1, cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9608,7 +9616,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
if (frame_related_p)
{
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+ plus_constant (Pmode, stack_pointer_rtx, cfa_offset));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -9916,17 +9924,22 @@ aarch64_expand_prologue (void)
code below does not handle it for -fstack-clash-protection. */
gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
+ /* The offset of the current SP from the bottom of the static frame. */
+ poly_int64 bytes_below_sp = frame_size;
+
/* Will only probe if the initial adjustment is larger than the guard
less the amount of the guard reserved for use by the caller's
outgoing args. */
aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
- force_isa_mode, true, false);
+ bytes_below_sp, force_isa_mode,
+ true, false);
+ bytes_below_sp -= initial_adjust;
if (callee_adjust != 0)
- aarch64_push_regs (reg1, reg2, callee_adjust);
-
- /* The offset of the current SP from the bottom of the static frame. */
- poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+ {
+ aarch64_push_regs (reg1, reg2, callee_adjust);
+ bytes_below_sp -= callee_adjust;
+ }
if (emit_frame_chain)
{
@@ -9994,7 +10007,7 @@ aarch64_expand_prologue (void)
|| known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
sve_callee_adjust,
- force_isa_mode,
+ bytes_below_sp, force_isa_mode,
!frame_pointer_needed, false);
bytes_below_sp -= sve_callee_adjust;
}
@@ -10005,10 +10018,11 @@ aarch64_expand_prologue (void)
/* We may need to probe the final adjustment if it is larger than the guard
that is assumed by the called. */
- gcc_assert (known_eq (bytes_below_sp, final_adjust));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
- force_isa_mode,
+ bytes_below_sp, force_isa_mode,
!frame_pointer_needed, true);
+ bytes_below_sp -= final_adjust;
+ gcc_assert (known_eq (bytes_below_sp, 0));
if (emit_frame_chain && maybe_ne (final_adjust, 0))
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
diff --git a/gcc/testsuite/g++.dg/torture/pr119610.C b/gcc/testsuite/g++.dg/torture/pr119610.C
new file mode 100644
index 0000000..9998026
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr119610.C
@@ -0,0 +1,18 @@
+// { dg-do run }
+// { dg-additional-options "-fstack-protector-strong" { target fstack_protector } }
+// { dg-additional-options "-fstack-clash-protection" { target supports_stack_clash_protection } }
+
+int *ptr;
+void foo() {
+ int c[1024*128];
+ ptr = c;
+ throw 1;
+}
+int main()
+{
+ try {
+ foo();
+ } catch(int x) {
+ return 0;
+ }
+}
diff --git a/gcc/testsuite/g++.target/aarch64/sve/pr119610-sve.C b/gcc/testsuite/g++.target/aarch64/sve/pr119610-sve.C
new file mode 100644
index 0000000..0044e51
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/pr119610-sve.C
@@ -0,0 +1,20 @@
+// { dg-do run { target aarch64_sve_hw } }
+// { dg-additional-options "-fstack-protector-strong" { target fstack_protector } }
+// { dg-additional-options "-fstack-clash-protection" { target supports_stack_clash_protection } }
+
+void *a_ptr, *b_ptr;
+void foo() {
+ __SVInt32_t a;
+ int b[1024*128];
+ a_ptr = &a;
+ b_ptr = b;
+ throw 1;
+}
+int main()
+{
+ try {
+ foo();
+ } catch(int x) {
+ return 0;
+ }
+}