diff options
author | Andrew Burgess <aburgess@redhat.com> | 2025-02-14 11:51:41 +0000 |
---|---|---|
committer | Andrew Burgess <aburgess@redhat.com> | 2025-03-15 12:36:46 +0000 |
commit | 8bfe8a6bfdd45de43c626a12fd176750486a0759 (patch) | |
tree | f009f658a4f681d144a0c390a7ec3f23524f11a6 /gdb/testsuite/gdb.python | |
parent | c7d973ab6189290fc894529cec5db7f585074ab4 (diff) | |
download | binutils-8bfe8a6bfdd45de43c626a12fd176750486a0759.zip binutils-8bfe8a6bfdd45de43c626a12fd176750486a0759.tar.gz binutils-8bfe8a6bfdd45de43c626a12fd176750486a0759.tar.bz2 |
gdb/python: handle non-utf-8 character from gdb.execute()
I noticed that it was not possible to return a string containing non
utf-8 characters using gdb.execute(). For example, using the binary
from the gdb.python/py-source-styling.exp test:
(gdb) file ./gdb/testsuite/outputs/gdb.python/py-source-styling/py-source-styling
Reading symbols from ./gdb/testsuite/outputs/gdb.python/py-source-styling/py-source-styling...
(gdb) set style enabled off
(gdb) list 26
21 int some_variable = 1234;
22
23 /* The following line contains a character that is non-utf-8. This is a
24 critical part of the test as Python 3 can't convert this into a string
25 using its default mechanism. */
26 char c[] = "�"; /* List this line. */
27
28 return 0;
29 }
(gdb) python print(gdb.execute('list 26', to_string=True))
Python Exception <class 'UnicodeDecodeError'>: 'utf-8' codec can't decode byte 0xc0 in position 250: invalid start byte
Error occurred in Python: 'utf-8' codec can't decode byte 0xc0 in position 250: invalid start byte
It is necessary to disable styling before the initial 'list 26',
otherwise the source will be passed through GNU source highlight, and
GNU source highlight seems to be smart enough to figure out the
character encoding, and convert it to UTF-8. This conversion is then
cached in the source cache, and the later Python gdb.execute call will
get back a pure UTF-8 string.
If source styling is disabled, then GDB caches the string without the
conversion to UTF-8, now the gdb.execute call gets back the string
with a non-UTF-8 character within it, and Python throws an error
during its attempt to create a string object.
I'm not, at this point, proposing a solution that tries to guess the
source file encoding, though I guess such a thing could be done.
Instead, I think we should make use of the host_charset(), as set by
the user with 'set host-charset ....' during the creation of the
Python string.
To do this, in execute_gdb_command, we should switch from
PyUnicode_FromString, which requires the input be a UTF-8 string, to
using PyUnicode_Decode, which allows GDB to specify the string
encoding. We will use host_charset().
With this done, it is now possible to list the file contents using
gdb.execute(), with the contents passing through a string:
(gdb) set host-charset ISO-8859-1
(gdb) python print(gdb.execute('list 26', to_string=True), end='')
21 int some_variable = 1234;
22
23 /* The following line contains a character that is non-utf-8. This is a
24 critical part of the test as Python 3 can't convert this into a string
25 using its default mechanism. */
26 char c[] = "À"; /* List this line. */
27
28 return 0;
29 }
(gdb)
There are already plenty of other places in GDB's Python code where we
use PyUnicode_Decode to create a string from something that might
contain user generated content, so I believe this is the correct
approach.
Diffstat (limited to 'gdb/testsuite/gdb.python')
-rw-r--r-- | gdb/testsuite/gdb.python/py-source-styling.exp | 69 |
1 files changed, 55 insertions, 14 deletions
diff --git a/gdb/testsuite/gdb.python/py-source-styling.exp b/gdb/testsuite/gdb.python/py-source-styling.exp index a406771..ba7e795 100644 --- a/gdb/testsuite/gdb.python/py-source-styling.exp +++ b/gdb/testsuite/gdb.python/py-source-styling.exp @@ -13,9 +13,13 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -# Check that the Python pygments module can be used for source -# highlighting when GNU source highlight is not available (or is -# disabled, as is done in this test). +# Test related to source code highlighting and Python. Includes a +# test for using the Pygments module as a fall back to GNU source +# highlight. +# +# This script also includes tests for handling a non-uft-8 character +# with both Pygments highlighting, and with gdb.execute (when using +# the list command). require allow_python_tests @@ -23,14 +27,17 @@ load_lib gdb-python.exp standard_testfile -with_ansi_styling_terminal { - # We need an ANSI-capable terminal to get the output, additionally - # we need to set LC_ALL so GDB knows the terminal is UTF-8 - # capable, otherwise we'll get a UnicodeEncodeError trying to - # encode the output. - if { [prepare_for_testing "failed to prepare" ${testfile} ${srcfile}] } { - return - } +if { [build_executable "failed to build" ${testfile} ${srcfile}] == -1 } { + return +} + +set line_number [gdb_get_line_number "List this line."] + +# Check that the Python pygments module can be used for source +# highlighting when GNU source highlight is not available (or is +# disabled, as is done in this test). +proc test_pygments_styling {} { + clean_restart $::binfile if { ![gdb_py_module_available "pygments"] } { unsupported "pygments module not available" @@ -46,8 +53,7 @@ with_ansi_styling_terminal { gdb_test "maint flush source-cache" "Source cache flushed\\." set seen_style_escape false - set line_number [gdb_get_line_number "List this line."] - gdb_test_multiple "list ${line_number}" "" { + gdb_test_multiple "list $::line_number" "" { -re "Python Exception.*" { fail $gdb_test_name } @@ -55,8 +61,43 @@ with_ansi_styling_terminal { set seen_style_escape true exp_continue } - -re "$gdb_prompt $" { + -re "$::gdb_prompt $" { gdb_assert { $seen_style_escape } $gdb_test_name } } } + +# Use gdb.execute to list source code containing non-utf-8 character. +# Check that initially GDB fails to convert the source code to a +# string, then set the correct host encoding, and try again. This +# time the conversion should succeed. +proc test_gdb_execute_non_utf8_source {} { + clean_restart $::binfile + + # The default host charset is utf-8, the source code contains a + # non-utf-8 character, so this will fail. + gdb_test \ + "python source = gdb.execute('list $::line_number', True, True)" \ + [multi_line \ + "Python Exception <class 'UnicodeDecodeError'>: 'ascii' codec can't decode byte 0xc0 in position 250: ordinal not in range\\(128\\)" \ + "Error occurred in Python: 'ascii' codec can't decode byte 0xc0 in position 250: ordinal not in range\\(128\\)"] \ + "gdb.execute fails to convert result to string" + + # Set the correct host charset, and try the conversion again. + gdb_test_no_output "set host-charset ISO-8859-1" + gdb_test_no_output \ + "python source = gdb.execute('list $::line_number', True, True)" \ + "gdb.execute does convert result to string" + + # Check that we captured something that looks like the expected source. + gdb_test "python print(source)" ".*List this line.*" +} + +# We need an ANSI-capable terminal to get the output, additionally we +# need to set LC_ALL so GDB knows the terminal is UTF-8 capable, +# otherwise we'll get a UnicodeEncodeError trying to encode the +# output. +with_ansi_styling_terminal { + test_pygments_styling + test_gdb_execute_non_utf8_source +} |