diff options
author | Pawel Kupczak <pawel.kupczak@intel.com> | 2025-08-28 11:50:15 +0000 |
---|---|---|
committer | Christina Schimpe <christina.schimpe@intel.com> | 2025-09-04 20:44:48 +0000 |
commit | 57ce06ac23a8b01d1bd9de2cbc1f79f75b96f0ca (patch) | |
tree | 09b72d23582cf08c139f65c73745f44bfdf81b26 | |
parent | 8862ee58868fc018b6fe179c3b6d61c30867d541 (diff) | |
download | binutils-57ce06ac23a8b01d1bd9de2cbc1f79f75b96f0ca.zip binutils-57ce06ac23a8b01d1bd9de2cbc1f79f75b96f0ca.tar.gz binutils-57ce06ac23a8b01d1bd9de2cbc1f79f75b96f0ca.tar.bz2 |
gdb, amd64: extend the amd64 prologue analyzer to skip stack alloc
Following the previous patch (gdb, amd64: extend the amd64 prologue
analyzer to skip register pushes), this patch extends the analyzer
further to be able to skip stack space allocation as the next prologue
part, for functions with a frame pointer. Implementation was based
on the i386 counterpart, which already had that functionality.
As of now, the stack allocation is not skipped. Examples below use C
source listed below, compiled with gcc 11.4.0.
```
int foo (int n)
{
int ns[] = { 1, 4, 9, 16, 25 };
return ns[n];
}
int
main (int argc, char **argv)
{
return foo (argc);
}
```
Compiling with "gcc -O0 -fno-omit-frame-pointer" we get:
```
(gdb) b foo
Breakpoint 1 at 0x1151
(gdb) r
...
Breakpoint 1, 0x0000555555555151 in foo ()
(gdb) disassemble
Dump of assembler code for function foo:
0x0000555555555149 <+0>: endbr64
0x000055555555514d <+4>: push %rbp
0x000055555555514e <+5>: mov %rsp,%rbp
=> 0x0000555555555151 <+8>: sub $0x30,%rsp
0x0000555555555155 <+12>: mov %edi,-0x24(%rbp)
...
```
With this patch, it gets skipped the same way register pushes are:
```
(gdb) b foo
Breakpoint 1 at 0x1155
(gdb) r
...
Breakpoint 1, 0x0000555555555155 in foo ()
(gdb) disassemble
Dump of assembler code for function foo:
0x0000555555555149 <+0>: endbr64
0x000055555555514d <+4>: push %rbp
0x000055555555514e <+5>: mov %rsp,%rbp
0x0000555555555151 <+8>: sub $0x30,%rsp
=> 0x0000555555555155 <+12>: mov %edi,-0x24(%rbp)
...
```
Reviewed-By: Guinevere Larsen <guinevere@redhat.com>
Approved-By: Andrew Burgess <aburgess@redhat.com>
6 files changed, 172 insertions, 26 deletions
diff --git a/gdb/amd64-tdep.c b/gdb/amd64-tdep.c index b692565..1afdaa8 100755 --- a/gdb/amd64-tdep.c +++ b/gdb/amd64-tdep.c @@ -2639,6 +2639,71 @@ amd64_analyze_register_saves (CORE_ADDR pc, CORE_ADDR current_pc, return pc; } +/* Check whether PC points at code allocating space on the stack. + If so, update CACHE and return pc past it or CURRENT_PC, whichever is + smaller. Otherwise, return PC passed to this function. */ + +static CORE_ADDR +amd64_analyze_stack_alloc (gdbarch *arch, CORE_ADDR pc, CORE_ADDR current_pc, + amd64_frame_cache *cache) +{ + static const gdb_byte sub_imm8_rsp[] = { 0x83, 0xec }; + static const gdb_byte sub_imm32_rsp[] = { 0x81, 0xec }; + static const gdb_byte lea_disp_rsp[] = { 0x8D, 0x64 }; + + bfd_endian byte_order = gdbarch_byte_order (arch); + const CORE_ADDR start_pc = pc; + + gdb_byte op; + if (target_read_code (pc, &op, 1) == -1) + return pc; + + /* Check for REX.W, indicating 64-bit operand size (in this case, for + %rsp). */ + if (op == 0x48) + pc++; + + if (current_pc <= pc) + return current_pc; + + gdb_byte buf[2]; + read_code (pc, buf, 2); + + /* Check for instruction allocating space on the stack, which looks like + sub imm8/32, %rsp + or + lea -imm (%rsp), %rsp + + and forward pc past it + update cache. */ + + /* sub imm8, %rsp. */ + if (memcmp (buf, sub_imm8_rsp, 2) == 0) + { + /* Instruction is 3 bytes long. The imm8 arg is the 3rd, single + byte. */ + cache->sp_offset += read_code_integer (pc + 2, 1, byte_order); + return pc + 3; + } + /* sub imm32, %rsp. */ + else if (memcmp (buf, sub_imm32_rsp, 2) == 0) + { + /* Instruction is 6 bytes long. The imm32 arg is stored in 4 bytes, + starting from 3rd one. */ + cache->sp_offset += read_code_integer (pc + 2, 4, byte_order); + return pc + 6; + } + /* lea -imm (%rsp), %rsp. */ + else if (memcmp (buf, lea_disp_rsp, 2) == 0) + { + /* Instruction is 4 bytes long. The imm arg is the 4th, single + byte. */ + cache->sp_offset += -1 * read_code_integer (pc + 3, 1, byte_order); + return pc + 4; + } + + return start_pc; +} + /* Do a limited analysis of the prologue at PC and update CACHE accordingly. Bail out early if CURRENT_PC is reached. Return the address where the analysis stopped. @@ -2684,7 +2749,8 @@ amd64_analyze_prologue (gdbarch *gdbarch, CORE_ADDR pc, CORE_ADDR current_pc, if (current_pc <= pc) return current_pc; - return amd64_analyze_register_saves (pc, current_pc, cache); + pc = amd64_analyze_register_saves (pc, current_pc, cache); + return amd64_analyze_stack_alloc (gdbarch, pc, current_pc, cache); } /* Work around false termination of prologue - GCC PR debug/48827. diff --git a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-no-cfi.S b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-no-cfi.S index bcebfe8..39cf3e6 100644 --- a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-no-cfi.S +++ b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-no-cfi.S @@ -34,28 +34,52 @@ foo: endbr64 pushq %rbp movq %rsp, %rbp + pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx - movl %edi, %r12d + subq $280, %rsp + movl %edi, %ebx movl %esi, %r14d movl %edx, %r13d + movl %ecx, %r12d + movq %fs:40, %rax + movq %rax, -56(%rbp) + xorl %eax, %eax +.L3: + leal (%rbx,%rax), %ecx + movslq %eax, %rdx + movb %cl, -320(%rbp,%rdx) + addl $1, %eax + cmpl $256, %eax + jne .L3 + movl %ebx, %edi call bar - movl %eax, %ebx + movl %eax, %r15d movl %r14d, %edi call bar - addl %eax, %ebx + leal (%r15,%rax), %r14d movl %r13d, %edi call bar + addl %eax, %r14d + movl %r12d, %edi + call bar + addl %r14d, %eax addl %ebx, %eax - addl %r12d, %eax + movq -56(%rbp), %rdx + subq %fs:40, %rdx + jne .L7 + addq $280, %rsp popq %rbx popq %r12 popq %r13 popq %r14 + popq %r15 popq %rbp ret +.L7: + call __stack_chk_fail@PLT .size foo, .-foo .globl main .type main, @function diff --git a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-offset.S b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-offset.S index 783ec33..ea001b3 100644 --- a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-offset.S +++ b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis-offset.S @@ -37,37 +37,59 @@ foo: pushq %r12 pushq %rbp pushq %rbx - movl %edi, %ebp + subq $272, %rsp + movl %edi, %ebx movl %esi, %r13d movl %edx, %r12d + movl %ecx, %ebp + movq %fs:40, %rax + movq %rax, 264(%rsp) + xorl %eax, %eax +.L3: + leal (%rbx,%rax), %ecx + movslq %eax, %rdx + movb %cl, (%rsp,%rdx) + addl $1, %eax + cmpl $256, %eax + jne .L3 + movl %ebx, %edi call bar - movl %eax, %ebx + movl %eax, %r14d movl %r13d, %edi call bar - addl %eax, %ebx + leal (%r14,%rax), %r13d movl %r12d, %edi call bar + addl %eax, %r13d + movl %ebp, %edi + call bar + addl %r13d, %eax addl %ebx, %eax - addl %ebp, %eax + movq 264(%rsp), %rdx + subq %fs:40, %rdx + jne .L7 + addq $272, %rsp popq %rbx popq %rbp popq %r12 popq %r13 popq %r14 ret +.L7: + call __stack_chk_fail@PLT .size foo, .-foo .globl main .type main, @function main: endbr64 - subq $16, %rsp + subq $24, %rsp leal (%rdi,%rdi), %ecx leal 2(%rdi), %edx leal 1(%rdi), %esi call foo movl %eax, 12(%rsp) movl 12(%rsp), %eax - addq $16, %rsp + addq $24, %rsp ret .size main, .-main .ident "GCC: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0" diff --git a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.S b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.S index c5ef4f6..691eee0 100644 --- a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.S +++ b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.S @@ -42,33 +42,60 @@ foo: .cfi_offset 6, -16 movq %rsp, %rbp .cfi_def_cfa_register 6 + pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx - .cfi_offset 14, -24 - .cfi_offset 13, -32 - .cfi_offset 12, -40 - .cfi_offset 3, -48 - movl %edi, %r12d + subq $280, %rsp + .cfi_offset 15, -24 + .cfi_offset 14, -32 + .cfi_offset 13, -40 + .cfi_offset 12, -48 + .cfi_offset 3, -56 + movl %edi, %ebx movl %esi, %r14d movl %edx, %r13d + movl %ecx, %r12d + movq %fs:40, %rax + movq %rax, -56(%rbp) + xorl %eax, %eax +.L3: + leal (%rbx,%rax), %ecx + movslq %eax, %rdx + movb %cl, -320(%rbp,%rdx) + addl $1, %eax + cmpl $256, %eax + jne .L3 + movl %ebx, %edi call bar - movl %eax, %ebx + movl %eax, %r15d movl %r14d, %edi call bar - addl %eax, %ebx + leal (%r15,%rax), %r14d movl %r13d, %edi call bar + addl %eax, %r14d + movl %r12d, %edi + call bar + addl %r14d, %eax addl %ebx, %eax - addl %r12d, %eax + movq -56(%rbp), %rdx + subq %fs:40, %rdx + jne .L7 + addq $280, %rsp popq %rbx popq %r12 popq %r13 popq %r14 + popq %r15 popq %rbp + .cfi_remember_state .cfi_def_cfa 7, 8 ret +.L7: + .cfi_restore_state + call __stack_chk_fail@PLT .cfi_endproc .LFE1: .size foo, .-foo diff --git a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.c b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.c index 1ce30b1..707b4fb 100644 --- a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.c +++ b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.c @@ -29,15 +29,22 @@ bar (int x) .cfi_def_cfa_register %rbp push %reg1 push %reg2 + sub $XXX, %rsp .cfi_offset %reg2, 32 .cfi_offset %reg1, 24 So to be able to unwind a register, GDB needs to skip prologue past - register pushes (to access .cfi directives). */ + register pushes and stack allocation (to access .cfi directives). */ int __attribute__ ((noinline)) foo (int a, int b, int c, int d) { - a += bar (a) + bar (b) + bar (c); + /* "volatile" alone isn't enough for clang to not optimize it out and + allocate space on the stack. */ + volatile char s[256]; + for (int i = 0; i < 256; i++) + s[i] = (char) (a + i); + + a += bar (a) + bar (b) + bar (c) + bar (d); return a; } diff --git a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.exp b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.exp index 7ad0c60..b2fcbe6 100644 --- a/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.exp +++ b/gdb/testsuite/gdb.arch/amd64-extended-prologue-analysis.exp @@ -14,11 +14,11 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This test verifies that when placing a breakpoint on a function with a frame -# pointer, instructions that push callee-saved registers in the prologue are -# skipped, without debug info. When stopped on such breakpoint, the pushed -# registers should be able to be immediately unwound. With debug info present, -# GDB would try to use prologue-end markers found in the line table to -# determine where the prologue ends. +# pointer, instructions that push callee-saved registers and stack allocation +# in the prologue are skipped, without debug info. When stopped on such +# breakpoint, the pushed registers should be able to be immediately unwound. +# With debug info present, GDB would try to use prologue-end markers found in +# the line table to determine where the prologue ends. # # It is also tested both with and without .eh_frame's .cfi directives - with # them, GDB can only unwind a register once stopped after .cfi directive for |