diff options
author | Pedro Alves <pedro@palves.net> | 2024-03-22 12:31:29 +0000 |
---|---|---|
committer | Pedro Alves <pedro@palves.net> | 2024-03-22 12:31:29 +0000 |
commit | 431a6b091d3106bc9750d82f50535d35d760f920 (patch) | |
tree | f9e20141109e8157f1334b968d60babd99152bbd /gdb | |
parent | 820a77554e64dd26d8ec2b03b59846268730c8be (diff) | |
download | gdb-431a6b091d3106bc9750d82f50535d35d760f920.zip gdb-431a6b091d3106bc9750d82f50535d35d760f920.tar.gz gdb-431a6b091d3106bc9750d82f50535d35d760f920.tar.bz2 |
Teach GDB to generate sparse core files (PR corefiles/31494)
This commit teaches GDB's gcore command to generate sparse core files
(if supported by the filesystem).
To create a sparse file, all you have to do is skip writing zeros to
the file, instead lseek'ing-ahead over them.
The sparse logic is applied when writing the memory sections, as
that's where the bulk of the data and the zeros are.
The commit also tweaks gdb.base/bigcore.exp to make it exercise
gdb-generated cores in addition to kernel-generated cores. We
couldn't do that before, because GDB's gcore on that test's program
would generate a multi-GB non-sparse core (16GB on my system).
After this commit, gdb.base/bigcore.exp generates, when testing with
GDB's gcore, a much smaller core file, roughly in line with what the
kernel produces:
real sizes:
$ du --hu testsuite/outputs/gdb.base/bigcore/bigcore.corefile.*
2.2M testsuite/outputs/gdb.base/bigcore/bigcore.corefile.gdb
2.0M testsuite/outputs/gdb.base/bigcore/bigcore.corefile.kernel
apparent sizes:
$ du --hu --apparent-size testsuite/outputs/gdb.base/bigcore/bigcore.corefile.*
16G testsuite/outputs/gdb.base/bigcore/bigcore.corefile.gdb
16G testsuite/outputs/gdb.base/bigcore/bigcore.corefile.kernel
Time to generate the core also goes down significantly. On my machine, I get:
when writing to an SSD, from 21.0s, down to 8.0s
when writing to an HDD, from 31.0s, down to 8.5s
The changes to gdb.base/bigcore.exp are smaller than they look at
first sight. It's basically mostly refactoring -- moving most of the
code to a new procedure which takes as argument who should dump the
core, and then calling the procedure twice. I purposely did not
modernize any of the refactored code in this patch.
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=31494
Reviewed-By: Lancelot Six <lancelot.six@amd.com>
Reviewed-By: Eli Zaretskii <eliz@gnu.org>
Reviewed-By: John Baldwin <jhb@FreeBSD.org>
Change-Id: I2554a6a4a72d8c199ce31f176e0ead0c0c76cff1
Diffstat (limited to 'gdb')
-rw-r--r-- | gdb/NEWS | 4 | ||||
-rw-r--r-- | gdb/doc/gdb.texinfo | 3 | ||||
-rw-r--r-- | gdb/gcore.c | 186 | ||||
-rw-r--r-- | gdb/testsuite/gdb.base/bigcore.exp | 238 |
4 files changed, 323 insertions, 108 deletions
@@ -23,6 +23,10 @@ disassemble command will now give an error. Previously the 'b' flag would always override the 'r' flag. +gcore +generate-core-file + GDB now generates sparse core files, on systems that support it. + maintenance info line-table Add an EPILOGUE-BEGIN column to the output of the command. It indicates if the line is considered the start of the epilgoue, and thus a point at diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo index f093ee2..9224829 100644 --- a/gdb/doc/gdb.texinfo +++ b/gdb/doc/gdb.texinfo @@ -13867,6 +13867,9 @@ Produce a core dump of the inferior process. The optional argument specified, the file name defaults to @file{core.@var{pid}}, where @var{pid} is the inferior process ID. +If supported by the filesystem where the core is written to, +@value{GDBN} generates a sparse core dump file. + Note that this command is implemented only for some systems (as of this writing, @sc{gnu}/Linux, FreeBSD, Solaris, and S390). diff --git a/gdb/gcore.c b/gdb/gcore.c index 7c12aa3..3d3973b 100644 --- a/gdb/gcore.c +++ b/gdb/gcore.c @@ -39,10 +39,21 @@ #include "gdbsupport/byte-vector.h" #include "gdbsupport/scope-exit.h" +/* To generate sparse cores, we look at the data to write in chunks of + this size when considering whether to skip the write. Only if we + have a full block of this size with all zeros do we skip writing + it. A simpler algorithm that would try to skip all zeros would + result in potentially many more write/lseek syscalls, as normal + data is typically sprinkled with many small holes of zeros. Also, + it's much more efficient to memcmp a block of data against an + all-zero buffer than to check each and every data byte against zero + one by one. */ +#define SPARSE_BLOCK_SIZE 0x1000 + /* The largest amount of memory to read from the target at once. We must throttle it to limit the amount of memory used by GDB during generate-core-file for programs with large resident data. */ -#define MAX_COPY_BYTES (1024 * 1024) +#define MAX_COPY_BYTES (256 * SPARSE_BLOCK_SIZE) static const char *default_gcore_target (void); static enum bfd_architecture default_gcore_arch (void); @@ -98,7 +109,12 @@ write_gcore_file_1 (bfd *obfd) bfd_set_section_alignment (note_sec, 0); bfd_set_section_size (note_sec, note_size); - /* Now create the memory/load sections. */ + /* Now create the memory/load sections. Note + gcore_memory_sections's sparse logic is assuming that we'll + always write something afterwards, which we do: just below, we + write the note section. So there's no need for an ftruncate-like + call to grow the file to the right size if the last memory + sections were zeros and we skipped writing them. */ if (gcore_memory_sections (obfd) == 0) error (_("gcore: failed to get corefile memory sections from target.")); @@ -567,6 +583,167 @@ objfile_find_memory_regions (struct target_ops *self, return 0; } +/* Check if we have a block full of zeros at DATA within the [DATA, + DATA+SIZE) buffer. Returns the size of the all-zero block found. + Returns at most the minimum between SIZE and SPARSE_BLOCK_SIZE. */ + +static size_t +get_all_zero_block_size (const gdb_byte *data, size_t size) +{ + size = std::min (size, (size_t) SPARSE_BLOCK_SIZE); + + /* A memcmp of a whole block is much faster than a simple for loop. + This makes a big difference, as with a for loop, this code would + dominate the performance and result in doubling the time to + generate a core, at the time of writing. With an optimized + memcmp, this doesn't even show up in the perf trace. */ + static const gdb_byte all_zero_block[SPARSE_BLOCK_SIZE] = {}; + if (memcmp (data, all_zero_block, size) == 0) + return size; + return 0; +} + +/* Basically a named-elements pair, used as return type of + find_next_all_zero_block. */ + +struct offset_and_size +{ + size_t offset; + size_t size; +}; + +/* Find the next all-zero block at DATA+OFFSET within the [DATA, + DATA+SIZE) buffer. Returns the offset and the size of the all-zero + block if found, or zero if not found. */ + +static offset_and_size +find_next_all_zero_block (const gdb_byte *data, size_t offset, size_t size) +{ + for (; offset < size; offset += SPARSE_BLOCK_SIZE) + { + size_t zero_block_size + = get_all_zero_block_size (data + offset, size - offset); + if (zero_block_size != 0) + return {offset, zero_block_size}; + } + return {0, 0}; +} + +/* Wrapper around bfd_set_section_contents that avoids writing + all-zero blocks to disk, so we create a sparse core file. + SKIP_ALIGN is a recursion helper -- if true, we'll skip aligning + the file position to SPARSE_BLOCK_SIZE. */ + +static bool +sparse_bfd_set_section_contents (bfd *obfd, asection *osec, + const gdb_byte *data, + size_t sec_offset, + size_t size, + bool skip_align = false) +{ + /* Note, we don't have to have special handling for the case of the + last memory region ending with zeros, because our caller always + writes out the note section after the memory/load sections. If + it didn't, we'd have to seek+write the last byte to make the file + size correct. (Or add an ftruncate abstraction to bfd and call + that.) */ + + if (size == 0) + return true; + + size_t data_offset = 0; + + if (!skip_align) + { + /* Align the all-zero block search with SPARSE_BLOCK_SIZE, to + better align with filesystem blocks. If we find we're + misaligned, then write/skip the bytes needed to make us + aligned. We do that with (one level) recursion. */ + + /* We need to know the section's file offset on disk. We can + only look at it after the bfd's 'output_has_begun' flag has + been set, as bfd hasn't computed the file offsets + otherwise. */ + if (!obfd->output_has_begun) + { + gdb_byte dummy = 0; + + /* A write forces BFD to compute the bfd's section file + positions. Zero size works for that too. */ + if (!bfd_set_section_contents (obfd, osec, &dummy, 0, 0)) + return false; + + gdb_assert (obfd->output_has_begun); + } + + /* How much after the last aligned offset are we writing at. */ + size_t aligned_offset_remainder + = (osec->filepos + sec_offset) % SPARSE_BLOCK_SIZE; + + /* Do we need to align? */ + if (aligned_offset_remainder != 0) + { + /* How much we need to advance in order to find the next + SPARSE_BLOCK_SIZE filepos-aligned block. */ + size_t distance_to_next_aligned + = SPARSE_BLOCK_SIZE - aligned_offset_remainder; + + /* How much we'll actually write in the recursion call. The + caller may want us to write fewer bytes than + DISTANCE_TO_NEXT_ALIGNED. */ + size_t align_write_size = std::min (size, distance_to_next_aligned); + + /* Recurse, skipping the alignment code. */ + if (!sparse_bfd_set_section_contents (obfd, osec, data, + sec_offset, + align_write_size, true)) + return false; + + /* Skip over what we've written, and proceed with + assumes-aligned logic. */ + data_offset += align_write_size; + } + } + + while (data_offset < size) + { + size_t all_zero_block_size + = get_all_zero_block_size (data + data_offset, size - data_offset); + if (all_zero_block_size != 0) + { + /* Skip writing all-zero blocks. */ + data_offset += all_zero_block_size; + continue; + } + + /* We have some non-zero data to write to file. Find the next + all-zero block within the data, and only write up to it. */ + + offset_and_size next_all_zero_block + = find_next_all_zero_block (data, + data_offset + SPARSE_BLOCK_SIZE, + size); + size_t next_data_offset = (next_all_zero_block.offset == 0 + ? size + : next_all_zero_block.offset); + + if (!bfd_set_section_contents (obfd, osec, data + data_offset, + sec_offset + data_offset, + next_data_offset - data_offset)) + return false; + + data_offset = next_data_offset; + + /* If we already know we have an all-zero block at the next + offset, we can skip calling get_all_zero_block_size for + it again. */ + if (next_all_zero_block.offset != 0) + data_offset += next_all_zero_block.size; + } + + return true; +} + static void gcore_copy_callback (bfd *obfd, asection *osec) { @@ -599,8 +776,9 @@ gcore_copy_callback (bfd *obfd, asection *osec) bfd_section_vma (osec))); break; } - if (!bfd_set_section_contents (obfd, osec, memhunk.data (), - offset, size)) + + if (!sparse_bfd_set_section_contents (obfd, osec, memhunk.data (), + offset, size)) { warning (_("Failed to write corefile contents (%s)."), bfd_errmsg (bfd_get_error ())); diff --git a/gdb/testsuite/gdb.base/bigcore.exp b/gdb/testsuite/gdb.base/bigcore.exp index 3f9ae48..6c64d40 100644 --- a/gdb/testsuite/gdb.base/bigcore.exp +++ b/gdb/testsuite/gdb.base/bigcore.exp @@ -43,23 +43,6 @@ if { [gdb_compile "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {deb return -1 } -# Run GDB on the bigcore program up-to where it will dump core. - -clean_restart ${binfile} -gdb_test_no_output "set print sevenbit-strings" -gdb_test_no_output "set width 0" - -# Get the core into the output directory. -set_inferior_cwd_to_output_dir - -if {![runto_main]} { - return 0 -} -set print_core_line [gdb_get_line_number "Dump core"] -gdb_test "tbreak $print_core_line" -gdb_test continue ".*print_string.*" -gdb_test next ".*0 = 0.*" - # Traverse part of bigcore's linked list of memory chunks (forward or # backward), saving each chunk's address. @@ -92,92 +75,11 @@ proc extract_heap { dir } { } return $heap } -set next_heap [extract_heap next] -set prev_heap [extract_heap prev] - -# Save the total allocated size within GDB so that we can check -# the core size later. -gdb_test_no_output "set \$bytes_allocated = bytes_allocated" "save heap size" - -# Now create a core dump - -# Rename the core file to "TESTFILE.corefile" rather than just "core", -# to avoid problems with sys admin types that like to regularly prune -# all files named "core" from the system. - -# Some systems append "core" to the name of the program; others append -# the name of the program to "core"; still others (like Linux, as of -# May 2003) create cores named "core.PID". - -# Save the process ID. Some systems dump the core into core.PID. -set inferior_pid [get_inferior_pid] - -# Dump core using SIGABRT -set oldtimeout $timeout -set timeout 600 -gdb_test "signal SIGABRT" "Program terminated with signal SIGABRT, .*" -set timeout $oldtimeout - -# Find the corefile. -set file [find_core_file $inferior_pid] -if { $file != "" } { - remote_exec build "mv $file $corefile" -} else { - untested "can't generate a core file" - return 0 -} -# Check that the corefile is plausibly large enough. We're trying to -# detect the case where the operating system has truncated the file -# just before signed wraparound. TCL, unfortunately, has a similar -# problem - so use catch. It can handle the "bad" size but not -# necessarily the "good" one. And we must use GDB for the comparison, -# similarly. - -if {[catch {file size $corefile} core_size] == 0} { - set core_ok 0 - gdb_test_multiple "print \$bytes_allocated < $core_size" "check core size" { - -re " = 1\r\n$gdb_prompt $" { - pass "check core size" - set core_ok 1 - } - -re " = 0\r\n$gdb_prompt $" { - pass "check core size" - set core_ok 0 - } - } -} { - # Probably failed due to the TCL build having problems with very - # large values. Since GDB uses a 64-bit off_t (when possible) it - # shouldn't have this problem. Assume that things are going to - # work. Without this assumption the test is skiped on systems - # (such as i386 GNU/Linux with patched kernel) which do pass. - pass "check core size" - set core_ok 1 -} -if {! $core_ok} { - untested "check core size (system does not support large corefiles)" - return 0 -} - -# Now load up that core file - -set test "load corefile" -gdb_test_multiple "core $corefile" "$test" { - -re "A program is being debugged already. Kill it. .y or n. " { - send_gdb "y\n" - exp_continue - } - -re "Core was generated by.*$gdb_prompt $" { - pass "$test" - } -} - -# Finally, re-traverse bigcore's linked list, checking each chunk's -# address against the executable. Don't use gdb_test_multiple as want -# only one pass/fail. Don't use exp_continue as the regular -# expression involving $heap needs to be re-evaluated for each new -# response. +# Re-traverse bigcore's linked list, checking each chunk's address +# against the executable. Don't use gdb_test_multiple as want only +# one pass/fail. Don't use exp_continue as the regular expression +# involving $heap needs to be re-evaluated for each new response. proc check_heap { dir heap } { global gdb_prompt @@ -208,5 +110,133 @@ proc check_heap { dir heap } { } } -check_heap next $next_heap -check_heap prev $prev_heap +# The bulk of the testcase. DUMPER indicates who is supposed to dump +# the core. It can be either "kernel", or "gdb". +proc test {dumper} { + global binfile timeout corefile gdb_prompt + + # Run GDB on the bigcore program up-to where it will dump core. + + clean_restart ${binfile} + gdb_test_no_output "set print sevenbit-strings" + gdb_test_no_output "set width 0" + + # Get the core into the output directory. + set_inferior_cwd_to_output_dir + + if {![runto_main]} { + return 0 + } + set print_core_line [gdb_get_line_number "Dump core"] + gdb_test "tbreak $print_core_line" + gdb_test continue ".*print_string.*" + gdb_test next ".*0 = 0.*" + + set next_heap [extract_heap next] + set prev_heap [extract_heap prev] + + # Save the total allocated size within GDB so that we can check + # the core size later. + gdb_test_no_output "set \$bytes_allocated = bytes_allocated" \ + "save heap size" + + # Now create a core dump. + + if {$dumper == "kernel"} { + # Rename the core file to "TESTFILE.corefile.$dumper" rather + # than just "core", to avoid problems with sys admin types + # that like to regularly prune all files named "core" from the + # system. + + # Some systems append "core" to the name of the program; + # others append the name of the program to "core"; still + # others (like Linux, as of May 2003) create cores named + # "core.PID". + + # Save the process ID. Some systems dump the core into + # core.PID. + set inferior_pid [get_inferior_pid] + + # Dump core using SIGABRT. + set oldtimeout $timeout + set timeout 600 + gdb_test "signal SIGABRT" "Program terminated with signal SIGABRT, .*" + set timeout $oldtimeout + + # Find the corefile. + set file [find_core_file $inferior_pid] + if { $file != "" } { + remote_exec build "mv $file $corefile.$dumper" + } else { + untested "can't generate a core file" + return 0 + } + } elseif {$dumper == "gdb"} { + gdb_gcore_cmd "$corefile.$dumper" "gcore corefile" + } else { + error "unhandled dumper: $dumper" + } + + # Check that the corefile is plausibly large enough. We're trying + # to detect the case where the operating system has truncated the + # file just before signed wraparound. TCL, unfortunately, has a + # similar problem - so use catch. It can handle the "bad" size + # but not necessarily the "good" one. And we must use GDB for the + # comparison, similarly. + + if {[catch {file size $corefile.$dumper} core_size] == 0} { + set core_ok 0 + gdb_test_multiple "print \$bytes_allocated < $core_size" \ + "check core size" { + -re " = 1\r\n$gdb_prompt $" { + pass "check core size" + set core_ok 1 + } + -re " = 0\r\n$gdb_prompt $" { + pass "check core size" + set core_ok 0 + } + } + } { + # Probably failed due to the TCL build having problems with + # very large values. Since GDB uses a 64-bit off_t (when + # possible) it shouldn't have this problem. Assume that + # things are going to work. Without this assumption the test + # is skiped on systems (such as i386 GNU/Linux with patched + # kernel) which do pass. + pass "check core size" + set core_ok 1 + } + if {! $core_ok} { + untested "check core size (system does not support large corefiles)" + return 0 + } + + # Now load up that core file. + + set test "load corefile" + gdb_test_multiple "core $corefile.$dumper" "$test" { + -re "A program is being debugged already. Kill it. .y or n. " { + send_gdb "y\n" + exp_continue + } + -re "Core was generated by.*$gdb_prompt $" { + pass "$test" + } + } + + # Finally, re-traverse bigcore's linked list, checking each + # chunk's address against the executable. + + check_heap next $next_heap + check_heap prev $prev_heap +} + +foreach_with_prefix dumper {kernel gdb} { + # GDB's gcore is too slow when testing with the extended-gdbserver + # board, since it requires reading all the inferior memory. + if {$dumper == "gdb" && [target_info gdb_protocol] != ""} { + continue + } + test $dumper +} |