diff options
-rw-r--r-- | gdb/Makefile.in | 1 | ||||
-rw-r--r-- | gdb/NEWS | 34 | ||||
-rw-r--r-- | gdb/data-directory/Makefile.in | 1 | ||||
-rw-r--r-- | gdb/doc/gdb.texinfo | 45 | ||||
-rw-r--r-- | gdb/doc/python.texi | 328 | ||||
-rw-r--r-- | gdb/python/lib/gdb/disassembler.py | 178 | ||||
-rw-r--r-- | gdb/python/py-disasm.c | 1090 | ||||
-rw-r--r-- | gdb/python/python-internal.h | 23 | ||||
-rw-r--r-- | gdb/python/python.c | 3 | ||||
-rw-r--r-- | gdb/testsuite/gdb.python/py-disasm.c | 25 | ||||
-rw-r--r-- | gdb/testsuite/gdb.python/py-disasm.exp | 209 | ||||
-rw-r--r-- | gdb/testsuite/gdb.python/py-disasm.py | 712 |
12 files changed, 2648 insertions, 1 deletions
diff --git a/gdb/Makefile.in b/gdb/Makefile.in index d800877..911daa2 100644 --- a/gdb/Makefile.in +++ b/gdb/Makefile.in @@ -393,6 +393,7 @@ SUBDIR_PYTHON_SRCS = \ python/py-cmd.c \ python/py-connection.c \ python/py-continueevent.c \ + python/py-disasm.c \ python/py-event.c \ python/py-evtregistry.c \ python/py-evts.c \ @@ -63,6 +63,40 @@ maintenance info line-table ** New method gdb.Frame.language that returns the name of the frame's language. + ** New Python API for wrapping GDB's disassembler: + + - gdb.disassembler.register_disassembler(DISASSEMBLER, ARCH). + DISASSEMBLER is a sub-class of gdb.disassembler.Disassembler. + ARCH is either None or a string containing a bfd architecture + name. DISASSEMBLER is registered as a disassembler for + architecture ARCH, or for all architectures if ARCH is None. + The previous disassembler registered for ARCH is returned, this + can be None if no previous disassembler was registered. + + - gdb.disassembler.Disassembler is the class from which all + disassemblers should inherit. Its constructor takes a string, + a name for the disassembler, which is currently only used in + some debug output. Sub-classes should override the __call__ + method to perform disassembly, invoking __call__ on this base + class will raise an exception. + + - gdb.disassembler.DisassembleInfo is the class used to describe + a single disassembly request from GDB. An instance of this + class is passed to the __call__ method of + gdb.disassembler.Disassembler and has the following read-only + attributes: 'address', and 'architecture', as well as the + following method: 'read_memory'. + + - gdb.disassembler.builtin_disassemble(INFO, MEMORY_SOURCE), + calls GDB's builtin disassembler on INFO, which is a + gdb.disassembler.DisassembleInfo object. MEMORY_SOURCE is + optional, its default value is None. If MEMORY_SOURCE is not + None then it must be an object that has a 'read_memory' method. + + - gdb.disassembler.DisassemblerResult is a class that can be used + to wrap the result of a call to a Disassembler. It has + read-only attributes 'length' and 'string'. + *** Changes in GDB 12 * DBX mode is deprecated, and will be removed in GDB 13 diff --git a/gdb/data-directory/Makefile.in b/gdb/data-directory/Makefile.in index b606fc6..cf5226f 100644 --- a/gdb/data-directory/Makefile.in +++ b/gdb/data-directory/Makefile.in @@ -69,6 +69,7 @@ PYTHON_DIR = python PYTHON_INSTALL_DIR = $(DESTDIR)$(GDB_DATADIR)/$(PYTHON_DIR) PYTHON_FILE_LIST = \ gdb/__init__.py \ + gdb/disassembler.py \ gdb/FrameDecorator.py \ gdb/FrameIterator.py \ gdb/frames.py \ diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo index 3a8cf3f..2178b47 100644 --- a/gdb/doc/gdb.texinfo +++ b/gdb/doc/gdb.texinfo @@ -39680,6 +39680,51 @@ packet history. @item maint info jit Print information about JIT code objects loaded in the current inferior. +@anchor{maint info python-disassemblers} +@kindex maint info python-disassemblers +@item maint info python-disassemblers +This command is defined within the @code{gdb.disassembler} Python +module (@pxref{Disassembly In Python}), and will only be present after +that module has been imported. To force the module to be imported do +the following: + +@smallexample +(@value{GDBP}) python import gdb.disassembler +@end smallexample + +This command lists all the architectures for which a disassembler is +currently registered, and the name of the disassembler. If a +disassembler is registered for all architectures, then this is listed +last against the @samp{GLOBAL} architecture. + +If one of the disassemblers would be selected for the architecture of +the current inferior, then this disassembler will be marked. + +The following example shows a situation in which two disassemblers are +registered, initially the @samp{i386} disassembler matches the current +architecture, then the architecture is changed, now the @samp{GLOBAL} +disassembler matches. + +@smallexample +@group +(@value{GDBP}) show architecture +The target architecture is set to "auto" (currently "i386"). +(@value{GDBP}) maint info python-disassemblers +Architecture Disassember Name +i386 Disassembler_1 (Matches current architecture) +GLOBAL Disassembler_2 +@end group +@group +(@value{GDBP}) set architecture arm +The target architecture is set to "arm". +(@value{GDBP}) maint info python-disassemblers +quit +Architecture Disassember Name +i386 Disassembler_1 +GLOBAL Disassembler_2 (Matches current architecture) +@end group +@end smallexample + @kindex set displaced-stepping @kindex show displaced-stepping @cindex displaced stepping support diff --git a/gdb/doc/python.texi b/gdb/doc/python.texi index aaf7666..75804ef 100644 --- a/gdb/doc/python.texi +++ b/gdb/doc/python.texi @@ -222,6 +222,7 @@ optional arguments while skipping others. Example: * Registers In Python:: Python representation of registers. * Connections In Python:: Python representation of connections. * TUI Windows In Python:: Implementing new TUI windows. +* Disassembly In Python:: Instruction Disassembly In Python @end menu @node Basic Python @@ -599,6 +600,7 @@ such as those used by readline for command input, and annotation related prompts are prohibited from being changed. @end defun +@anchor{gdb_architecture_names} @defun gdb.architecture_names () Return a list containing all of the architecture names that the current build of @value{GDBN} supports. Each architecture name is a @@ -3287,6 +3289,7 @@ single address space, so this may not match the architecture of a particular frame (@pxref{Frames In Python}). @end defun +@anchor{gdbpy_inferior_read_memory} @findex Inferior.read_memory @defun Inferior.read_memory (address, length) Read @var{length} addressable memory units from the inferior, starting at @@ -6575,6 +6578,331 @@ corner), and @var{button} specifies which mouse button was used, whose values can be 1 (left), 2 (middle), or 3 (right). @end defun +@node Disassembly In Python +@subsubsection Instruction Disassembly In Python +@cindex python instruction disassembly + +@value{GDBN}'s builtin disassembler can be extended, or even replaced, +using the Python API. The disassembler related features are contained +within the @code{gdb.disassembler} module: + +@deftp {class} gdb.disassembler.DisassembleInfo +Disassembly is driven by instances of this class. Each time +@value{GDBN} needs to disassemble an instruction, an instance of this +class is created and passed to a registered disassembler. The +disassembler is then responsible for disassembling an instruction and +returning a result. + +Instances of this type are usually created within @value{GDBN}, +however, it is possible to create a copy of an instance of this type, +see the description of @code{__init__} for more details. + +This class has the following properties and methods: + +@defvar DisassembleInfo.address +A read-only integer containing the address at which @value{GDBN} +wishes to disassemble a single instruction. +@end defvar + +@defvar DisassembleInfo.architecture +The @code{gdb.Architecture} (@pxref{Architectures In Python}) for +which @value{GDBN} is currently disassembling, this property is +read-only. +@end defvar + +@defvar DisassembleInfo.progspace +The @code{gdb.Progspace} (@pxref{Progspaces In Python,,Program Spaces +In Python}) for which @value{GDBN} is currently disassembling, this +property is read-only. +@end defvar + +@defun DisassembleInfo.is_valid () +Returns @code{True} if the @code{DisassembleInfo} object is valid, +@code{False} if not. A @code{DisassembleInfo} object will become +invalid once the disassembly call for which the @code{DisassembleInfo} +was created, has returned. Calling other @code{DisassembleInfo} +methods, or accessing @code{DisassembleInfo} properties, will raise a +@code{RuntimeError} exception if it is invalid. +@end defun + +@defun DisassembleInfo.__init__ (info) +This can be used to create a new @code{DisassembleInfo} object that is +a copy of @var{info}. The copy will have the same @code{address}, +@code{architecture}, and @code{progspace} values as @var{info}, and +will become invalid at the same time as @var{info}. + +This method exists so that sub-classes of @code{DisassembleInfo} can +be created, these sub-classes must be initialized as copies of an +existing @code{DisassembleInfo} object, but sub-classes might choose +to override the @code{read_memory} method, and so control what +@value{GDBN} sees when reading from memory +(@pxref{builtin_disassemble}). +@end defun + +@defun DisassembleInfo.read_memory (length, offset) +This method allows the disassembler to read the bytes of the +instruction to be disassembled. The method reads @var{length} bytes, +starting at @var{offset} from +@code{DisassembleInfo.address}. + +It is important that the disassembler read the instruction bytes using +this method, rather than reading inferior memory directly, as in some +cases @value{GDBN} disassembles from an internal buffer rather than +directly from inferior memory, calling this method handles this +detail. + +Returns a buffer object, which behaves much like an array or a string, +just as @code{Inferior.read_memory} does +(@pxref{gdbpy_inferior_read_memory,,Inferior.read_memory}). The +length of the returned buffer will always be exactly @var{length}. + +If @value{GDBN} is unable to read the required memory then a +@code{gdb.MemoryError} exception is raised (@pxref{Exception +Handling}). + +This method can be overridden by a sub-class in order to control what +@value{GDBN} sees when reading from memory +(@pxref{builtin_disassemble}). When overriding this method it is +important to understand how @code{builtin_disassemble} makes use of +this method. + +While disassembling a single instruction there could be multiple calls +to this method, and the same bytes might be read multiple times. Any +single call might only read a subset of the total instruction bytes. + +If an implementation of @code{read_memory} is unable to read the +requested memory contents, for example, if there's a request to read +from an invalid memory address, then a @code{gdb.MemoryError} should +be raised. + +Raising a @code{MemoryError} inside @code{read_memory} does not +automatically mean a @code{MemoryError} will be raised by +@code{builtin_disassemble}. It is possible the @value{GDBN}'s builtin +disassembler is probing to see how many bytes are available. When +@code{read_memory} raises the @code{MemoryError} the builtin +disassembler might be able to perform a complete disassembly with the +bytes it has available, in this case @code{builtin_disassemble} will +not itself raise a @code{MemoryError}. + +Any other exception type raised in @code{read_memory} will propagate +back and be available re-raised by @code{builtin_disassemble}. +@end defun +@end deftp + +@deftp {class} Disassembler +This is a base class from which all user implemented disassemblers +must inherit. + +@defun Disassembler.__init__ (name) +The constructor takes @var{name}, a string, which should be a short +name for this disassembler. +@end defun + +@defun Disassembler.__call__ (info) +The @code{__call__} method must be overridden by sub-classes to +perform disassembly. Calling @code{__call__} on this base class will +raise a @code{NotImplementedError} exception. + +The @var{info} argument is an instance of @code{DisassembleInfo}, and +describes the instruction that @value{GDBN} wants disassembling. + +If this function returns @code{None}, this indicates to @value{GDBN} +that this sub-class doesn't wish to disassemble the requested +instruction. @value{GDBN} will then use its builtin disassembler to +perform the disassembly. + +Alternatively, this function can return a @code{DisassemblerResult} +that represents the disassembled instruction, this type is described +in more detail below. + +The @code{__call__} method can raise a @code{gdb.MemoryError} +exception (@pxref{Exception Handling}) to indicate to @value{GDBN} +that there was a problem accessing the required memory, this will then +be displayed by @value{GDBN} within the disassembler output. + +Ideally, the only three outcomes from invoking @code{__call__} would +be a return of @code{None}, a successful disassembly returned in a +@code{DisassemblerResult}, or a @code{MemoryError} indicating that +there was a problem reading memory. + +However, as an implementation of @code{__call__} could fail due to +other reasons, e.g.@: some external resource required to perform +disassembly is temporarily unavailable, then, if @code{__call__} +raises a @code{GdbError}, the exception will be converted to a string +and printed at the end of the disassembly output, the disassembly +request will then stop. + +Any other exception type raised by the @code{__call__} method is +considered an error in the user code, the exception will be printed to +the error stream according to the @kbd{set python print-stack} setting +(@pxref{set_python_print_stack,,@kbd{set python print-stack}}). +@end defun +@end deftp + +@deftp {class} DisassemblerResult +This class is used to hold the result of calling +@w{@code{Disassembler.__call__}}, and represents a single disassembled +instruction. This class has the following properties and methods: + +@defun DisassemblerResult.__init__ (@var{length}, @var{string}) +Initialize an instance of this class, @var{length} is the length of +the disassembled instruction in bytes, which must be greater than +zero, and @var{string} is a non-empty string that represents the +disassembled instruction. +@end defun + +@defvar DisassemblerResult.length +A read-only property containing the length of the disassembled +instruction in bytes, this will always be greater than zero. +@end defvar + +@defvar DisassemblerResult.string +A read-only property containing a non-empty string representing the +disassembled instruction. +@end defvar +@end deftp + +The following functions are also contained in the +@code{gdb.disassembler} module: + +@defun register_disassembler (disassembler, architecture) +The @var{disassembler} must be a sub-class of +@code{gdb.disassembler.Disassembler} or @code{None}. + +The optional @var{architecture} is either a string, or the value +@code{None}. If it is a string, then it should be the name of an +architecture known to @value{GDBN}, as returned either from +@code{gdb.Architecture.name} +(@pxref{gdbpy_architecture_name,,gdb.Architecture.name}), or from +@code{gdb.architecture_names} +(@pxref{gdb_architecture_names,,gdb.architecture_names}). + +The @var{disassembler} will be installed for the architecture named by +@var{architecture}, or if @var{architecture} is @code{None}, then +@var{disassembler} will be installed as a global disassembler for use +by all architectures. + +@cindex disassembler in Python, global vs.@: specific +@cindex search order for disassembler in Python +@cindex look up of disassembler in Python +@value{GDBN} only records a single disassembler for each architecture, +and a single global disassembler. Calling +@code{register_disassembler} for an architecture, or for the global +disassembler, will replace any existing disassembler registered for +that @var{architecture} value. The previous disassembler is returned. + +If @var{disassembler} is @code{None} then any disassembler currently +registered for @var{architecture} is deregistered and returned. + +When @value{GDBN} is looking for a disassembler to use, @value{GDBN} +first looks for an architecture specific disassembler. If none has +been registered then @value{GDBN} looks for a global disassembler (one +registered with @var{architecture} set to @code{None}). Only one +disassembler is called to perform disassembly, so, if there is both an +architecture specific disassembler, and a global disassembler +registered, it is the architecture specific disassembler that will be +used. + +@value{GDBN} tracks the architecture specific, and global +disassemblers separately, so it doesn't matter in which order +disassemblers are created or registered; an architecture specific +disassembler, if present, will always be used in preference to a +global disassembler. + +You can use the @kbd{maint info python-disassemblers} command +(@pxref{maint info python-disassemblers}) to see which disassemblers +have been registered. +@end defun + +@anchor{builtin_disassemble} +@defun builtin_disassemble (info) +This function calls back into @value{GDBN}'s builtin disassembler to +disassemble the instruction identified by @var{info}, an instance, or +sub-class, of @code{DisassembleInfo}. + +When the builtin disassembler needs to read memory the +@code{read_memory} method on @var{info} will be called. By +sub-classing @code{DisassembleInfo} and overriding the +@code{read_memory} method, it is possible to intercept calls to +@code{read_memory} from the builtin disassembler, and to modify the +values returned. + +It is important to understand that, even when +@code{DisassembleInfo.read_memory} raises a @code{gdb.MemoryError}, it +is the internal disassembler itself that reports the memory error to +@value{GDBN}. The reason for this is that the disassembler might +probe memory to see if a byte is readable or not; if the byte can't be +read then the disassembler may choose not to report an error, but +instead to disassemble the bytes that it does have available. + +If the builtin disassembler is successful then an instance of +@code{DisassemblerResult} is returned from @code{builtin_disassemble}, +alternatively, if something goes wrong, an exception will be raised. + +A @code{MemoryError} will be raised if @code{builtin_disassemble} is +unable to read some memory that is required in order to perform +disassembly correctly. + +Any exception that is not a @code{MemoryError}, that is raised in a +call to @code{read_memory}, will pass through +@code{builtin_disassemble}, and be visible to the caller. + +Finally, there are a few cases where @value{GDBN}'s builtin +disassembler can fail for reasons that are not covered by +@code{MemoryError}. In these cases, a @code{GdbError} will be raised. +The contents of the exception will be a string describing the problem +the disassembler encountered. +@end defun + +Here is an example that registers a global disassembler. The new +disassembler invokes the builtin disassembler, and then adds a +comment, @code{## Comment}, to each line of disassembly output: + +@smallexample +class ExampleDisassembler(gdb.disassembler.Disassembler): + def __init__(self): + super().__init__("ExampleDisassembler") + + def __call__(self, info): + result = gdb.disassembler.builtin_disassemble(info) + length = result.length + text = result.string + "\t## Comment" + return gdb.disassembler.DisassemblerResult(length, text) + +gdb.disassembler.register_disassembler(ExampleDisassembler()) +@end smallexample + +The following example creates a sub-class of @code{DisassembleInfo} in +order to intercept the @code{read_memory} calls, within +@code{read_memory} any bytes read from memory have the two 4-bit +nibbles swapped around. This isn't a very useful adjustment, but +serves as an example. + +@smallexample +class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + buffer = super().read_memory(length, offset) + result = bytearray() + for b in buffer: + v = int.from_bytes(b, 'little') + v = (v << 4) & 0xf0 | (v >> 4) + result.append(v) + return memoryview(result) + +class NibbleSwapDisassembler(gdb.disassembler.Disassembler): + def __init__(self): + super().__init__("NibbleSwapDisassembler") + + def __call__(self, info): + info = MyInfo(info) + return gdb.disassembler.builtin_disassemble(info) + +gdb.disassembler.register_disassembler(NibbleSwapDisassembler()) +@end smallexample + @node Python Auto-loading @subsection Python Auto-loading @cindex Python auto-loading diff --git a/gdb/python/lib/gdb/disassembler.py b/gdb/python/lib/gdb/disassembler.py new file mode 100644 index 0000000..5a2d94a --- /dev/null +++ b/gdb/python/lib/gdb/disassembler.py @@ -0,0 +1,178 @@ +# Copyright (C) 2021-2022 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""Disassembler related module.""" + +import gdb +import _gdb.disassembler + +# Re-export everything from the _gdb.disassembler module, which is +# defined within GDB's C++ code. +from _gdb.disassembler import * + +# Module global dictionary of gdb.disassembler.Disassembler objects. +# The keys of this dictionary are bfd architecture names, or the +# special value None. +# +# When a request to disassemble comes in we first lookup the bfd +# architecture name from the gdbarch, if that name exists in this +# dictionary then we use that Disassembler object. +# +# If there's no architecture specific disassembler then we look for +# the key None in this dictionary, and if that key exists, we use that +# disassembler. +# +# If none of the above checks found a suitable disassembler, then no +# disassembly is performed in Python. +_disassemblers_dict = {} + + +class Disassembler(object): + """A base class from which all user implemented disassemblers must + inherit.""" + + def __init__(self, name): + """Constructor. Takes a name, which should be a string, which can be + used to identify this disassembler in diagnostic messages.""" + self.name = name + + def __call__(self, info): + """A default implementation of __call__. All sub-classes must + override this method. Calling this default implementation will throw + a NotImplementedError exception.""" + raise NotImplementedError("Disassembler.__call__") + + +def register_disassembler(disassembler, architecture=None): + """Register a disassembler. DISASSEMBLER is a sub-class of + gdb.disassembler.Disassembler. ARCHITECTURE is either None or a + string, the name of an architecture known to GDB. + + DISASSEMBLER is registered as a disassembler for ARCHITECTURE, or + all architectures when ARCHITECTURE is None. + + Returns the previous disassembler registered with this + ARCHITECTURE value. + """ + + if not isinstance(disassembler, Disassembler) and disassembler is not None: + raise TypeError("disassembler should sub-class gdb.disassembler.Disassembler") + + old = None + if architecture in _disassemblers_dict: + old = _disassemblers_dict[architecture] + del _disassemblers_dict[architecture] + if disassembler is not None: + _disassemblers_dict[architecture] = disassembler + + # Call the private _set_enabled function within the + # _gdb.disassembler module. This function sets a global flag + # within GDB's C++ code that enables or dissables the Python + # disassembler functionality, this improves performance of the + # disassembler by avoiding unneeded calls into Python when we know + # that no disassemblers are registered. + _gdb.disassembler._set_enabled(len(_disassemblers_dict) > 0) + return old + + +def _print_insn(info): + """This function is called by GDB when it wants to disassemble an + instruction. INFO describes the instruction to be + disassembled.""" + + def lookup_disassembler(arch): + try: + name = arch.name() + if name is None: + return None + if name in _disassemblers_dict: + return _disassemblers_dict[name] + if None in _disassemblers_dict: + return _disassemblers_dict[None] + return None + except: + # It's pretty unlikely this exception case will ever + # trigger, one situation would be if the user somehow + # corrupted the _disassemblers_dict variable such that it + # was no longer a dictionary. + return None + + disassembler = lookup_disassembler(info.architecture) + if disassembler is None: + return None + return disassembler(info) + + +class maint_info_py_disassemblers_cmd(gdb.Command): + """ + List all registered Python disassemblers. + + List the name of all registered Python disassemblers, next to the + name of the architecture for which the disassembler is registered. + + The global Python disassembler is listed next to the string + 'GLOBAL'. + + The disassembler that matches the architecture of the currently + selected inferior will be marked, this is an indication of which + disassembler will be invoked if any disassembly is performed in + the current inferior. + """ + + def __init__(self): + super().__init__("maintenance info python-disassemblers", gdb.COMMAND_USER) + + def invoke(self, args, from_tty): + # If no disassemblers are registered, tell the user. + if len(_disassemblers_dict) == 0: + print("No Python disassemblers registered.") + return + + # Figure out the longest architecture name, so we can + # correctly format the table of results. + longest_arch_name = 0 + for architecture in _disassemblers_dict: + if architecture is not None: + name = _disassemblers_dict[architecture].name + if len(name) > longest_arch_name: + longest_arch_name = len(name) + + # Figure out the name of the current architecture. There + # should always be a current inferior, but if, somehow, there + # isn't, then leave curr_arch as the empty string, which will + # not then match agaisnt any architecture in the dictionary. + curr_arch = "" + if gdb.selected_inferior() is not None: + curr_arch = gdb.selected_inferior().architecture().name() + + # Now print the dictionary of registered disassemblers out to + # the user. + match_tag = "\t(Matches current architecture)" + fmt_len = max(longest_arch_name, len("Architecture")) + format_string = "{:" + str(fmt_len) + "s} {:s}" + print(format_string.format("Architecture", "Disassember Name")) + for architecture in _disassemblers_dict: + if architecture is not None: + name = _disassemblers_dict[architecture].name + if architecture == curr_arch: + name += match_tag + match_tag = "" + print(format_string.format(architecture, name)) + if None in _disassemblers_dict: + name = _disassemblers_dict[None].name + match_tag + print(format_string.format("GLOBAL", name)) + + +maint_info_py_disassemblers_cmd() diff --git a/gdb/python/py-disasm.c b/gdb/python/py-disasm.c new file mode 100644 index 0000000..4c78ca3 --- /dev/null +++ b/gdb/python/py-disasm.c @@ -0,0 +1,1090 @@ +/* Python interface to instruction disassembly. + + Copyright (C) 2021-2022 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "defs.h" +#include "python-internal.h" +#include "dis-asm.h" +#include "arch-utils.h" +#include "charset.h" +#include "disasm.h" +#include "progspace.h" + +/* Implement gdb.disassembler.DisassembleInfo type. An object of this type + represents a single disassembler request from GDB. */ + +struct disasm_info_object +{ + PyObject_HEAD + + /* The architecture in which we are disassembling. */ + struct gdbarch *gdbarch; + + /* The program_space in which we are disassembling. */ + struct program_space *program_space; + + /* Address of the instruction to disassemble. */ + bfd_vma address; + + /* The disassemble_info passed from core GDB, this contains the + callbacks necessary to read the instruction from core GDB, and to + print the disassembled instruction. */ + disassemble_info *gdb_info; + + /* If copies of this object are created then they are chained together + via this NEXT pointer, this allows all the copies to be invalidated at + the same time as the parent object. */ + struct disasm_info_object *next; +}; + +extern PyTypeObject disasm_info_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_info_object"); + +/* Implement gdb.disassembler.DisassemblerResult type, an object that holds + the result of calling the disassembler. This is mostly the length of + the disassembled instruction (in bytes), and the string representing the + disassembled instruction. */ + +struct disasm_result_object +{ + PyObject_HEAD + + /* The length of the disassembled instruction in bytes. */ + int length; + + /* A buffer which, when allocated, holds the disassembled content of an + instruction. */ + string_file *content; +}; + +extern PyTypeObject disasm_result_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_result_object"); + +/* When this is false we fast path out of gdbpy_print_insn, which should + keep the performance impact of the Python disassembler down. This is + set to true from Python by calling gdb.disassembler._set_enabled() when + the user registers a disassembler. */ + +static bool python_print_insn_enabled = false; + +/* A sub-class of gdb_disassembler that holds a pointer to a Python + DisassembleInfo object. A pointer to an instance of this class is + placed in the application_data field of the disassemble_info that is + used when we call gdbarch_print_insn. */ + +struct gdbpy_disassembler : public gdb_printing_disassembler +{ + /* Constructor. */ + gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source); + + /* Get the DisassembleInfo object pointer. */ + disasm_info_object * + py_disasm_info () const + { + return m_disasm_info_object; + } + + /* Callbacks used by disassemble_info. */ + static void memory_error_func (int status, bfd_vma memaddr, + struct disassemble_info *info); + static void print_address_func (bfd_vma addr, + struct disassemble_info *info); + static int read_memory_func (bfd_vma memaddr, gdb_byte *buff, + unsigned int len, + struct disassemble_info *info); + + /* Return a reference to an optional that contains the address at which a + memory error occurred. The optional will only have a value if a + memory error actually occurred. */ + const gdb::optional<CORE_ADDR> &memory_error_address () const + { return m_memory_error_address; } + + /* Return the content of the disassembler as a string. The contents are + moved out of the disassembler, so after this call the disassembler + contents have been reset back to empty. */ + std::string release () + { + return m_string_file.release (); + } + +private: + + /* Where the disassembler result is written. */ + string_file m_string_file; + + /* The DisassembleInfo object we are disassembling for. */ + disasm_info_object *m_disasm_info_object; + + /* When the user indicates that a memory error has occurred then the + address of the memory error is stored in here. */ + gdb::optional<CORE_ADDR> m_memory_error_address; + + /* When the user calls the builtin_disassemble function, if they pass a + memory source object then a pointer to the object is placed in here, + otherwise, this field is nullptr. */ + PyObject *m_memory_source; +}; + +/* Return true if OBJ is still valid, otherwise, return false. A valid OBJ + will have a non-nullptr gdb_info field. */ + +static bool +disasm_info_object_is_valid (disasm_info_object *obj) +{ + return obj->gdb_info != nullptr; +} + +/* Fill in OBJ with all the other arguments. */ + +static void +disasm_info_fill (disasm_info_object *obj, struct gdbarch *gdbarch, + program_space *progspace, bfd_vma address, + disassemble_info *di, disasm_info_object *next) +{ + obj->gdbarch = gdbarch; + obj->program_space = progspace; + obj->address = address; + obj->gdb_info = di; + obj->next = next; +} + +/* Implement DisassembleInfo.__init__. Takes a single argument that must + be another DisassembleInfo object and copies the contents from the + argument into this new object. */ + +static int +disasm_info_init (PyObject *self, PyObject *args, PyObject *kwargs) +{ + static const char *keywords[] = { "info", NULL }; + PyObject *info_obj; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "O!", keywords, + &disasm_info_object_type, + &info_obj)) + return -1; + + disasm_info_object *other = (disasm_info_object *) info_obj; + disasm_info_object *info = (disasm_info_object *) self; + disasm_info_fill (info, other->gdbarch, other->program_space, + other->address, other->gdb_info, other->next); + other->next = info; + + /* As the OTHER object now holds a pointer to INFO we inc the ref count + on INFO. This stops INFO being deleted until OTHER has gone away. */ + Py_INCREF ((PyObject *) info); + return 0; +} + +/* The tp_dealloc callback for the DisassembleInfo type. */ + +static void +disasm_info_dealloc (PyObject *self) +{ + disasm_info_object *obj = (disasm_info_object *) self; + + /* We no longer care about the object our NEXT pointer points at, so we + can decrement its reference count. This macro handles the case when + NEXT is nullptr. */ + Py_XDECREF ((PyObject *) obj->next); + + /* Now core deallocation behaviour. */ + Py_TYPE (self)->tp_free (self); +} + +/* Implement DisassembleInfo.is_valid(), really just a wrapper around the + disasm_info_object_is_valid function above. */ + +static PyObject * +disasmpy_info_is_valid (PyObject *self, PyObject *args) +{ + disasm_info_object *disasm_obj = (disasm_info_object *) self; + + if (disasm_info_object_is_valid (disasm_obj)) + Py_RETURN_TRUE; + + Py_RETURN_FALSE; +} + +/* Set the Python exception to be a gdb.MemoryError object, with ADDRESS + as its payload. */ + +static void +disasmpy_set_memory_error_for_address (CORE_ADDR address) +{ + PyObject *address_obj = gdb_py_object_from_longest (address).release (); + PyErr_SetObject (gdbpy_gdb_memory_error, address_obj); +} + +/* Ensure that a gdb.disassembler.DisassembleInfo is valid. */ + +#define DISASMPY_DISASM_INFO_REQUIRE_VALID(Info) \ + do { \ + if (!disasm_info_object_is_valid (Info)) \ + { \ + PyErr_SetString (PyExc_RuntimeError, \ + _("DisassembleInfo is no longer valid.")); \ + return nullptr; \ + } \ + } while (0) + +/* Initialise OBJ, a DisassemblerResult object with LENGTH and CONTENT. + OBJ might already have been initialised, in which case any existing + content should be discarded before the new CONTENT is moved in. */ + +static void +disasmpy_init_disassembler_result (disasm_result_object *obj, int length, + std::string content) +{ + if (obj->content == nullptr) + obj->content = new string_file; + else + obj->content->clear (); + + obj->length = length; + *(obj->content) = std::move (content); +} + +/* Implement gdb.disassembler.builtin_disassemble(). Calls back into GDB's + builtin disassembler. The first argument is a DisassembleInfo object + describing what to disassemble. The second argument is optional and + provides a mechanism to modify the memory contents that the builtin + disassembler will actually disassemble. + + Returns an instance of gdb.disassembler.DisassemblerResult, an object + that wraps a disassembled instruction, or it raises a + gdb.MemoryError. */ + +static PyObject * +disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw) +{ + PyObject *info_obj, *memory_source_obj = nullptr; + static const char *keywords[] = { "info", "memory_source", nullptr }; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "O!|O", keywords, + &disasm_info_object_type, &info_obj, + &memory_source_obj)) + return nullptr; + + disasm_info_object *disasm_info = (disasm_info_object *) info_obj; + DISASMPY_DISASM_INFO_REQUIRE_VALID (disasm_info); + + /* Where the result will be written. */ + gdbpy_disassembler disassembler (disasm_info, memory_source_obj); + + /* Now actually perform the disassembly. LENGTH is set to the length of + the disassembled instruction, or -1 if there was a memory-error + encountered while disassembling. See below more more details on + handling of -1 return value. */ + int length; + try + { + length = gdbarch_print_insn (disasm_info->gdbarch, disasm_info->address, + disassembler.disasm_info ()); + } + catch (gdbpy_err_fetch &pyerr) + { + /* Reinstall the Python exception held in PYERR. This clears to + pointers held in PYERR, hence the need to catch as a non-const + reference. */ + pyerr.restore (); + return nullptr; + } + + if (length == -1) + { + + /* In an ideal world, every disassembler should always call the + memory error function before returning a status of -1 as the only + error a disassembler should encounter is a failure to read + memory. Unfortunately, there are some disassemblers who don't + follow this rule, and will return -1 without calling the memory + error function. + + To make the Python API simpler, we just classify everything as a + memory error, but the message has to be modified for the case + where the disassembler didn't call the memory error function. */ + if (disassembler.memory_error_address ().has_value ()) + { + CORE_ADDR addr = *disassembler.memory_error_address (); + disasmpy_set_memory_error_for_address (addr); + } + else + { + std::string content = disassembler.release (); + if (!content.empty ()) + PyErr_SetString (gdbpy_gdberror_exc, content.c_str ()); + else + PyErr_SetString (gdbpy_gdberror_exc, + _("Unknown disassembly error.")); + } + return nullptr; + } + + /* Instructions are either non-zero in length, or we got an error, + indicated by a length of -1, which we handled above. */ + gdb_assert (length > 0); + + /* We should not have seen a memory error in this case. */ + gdb_assert (!disassembler.memory_error_address ().has_value ()); + + /* Create a DisassemblerResult containing the results. */ + std::string content = disassembler.release (); + PyTypeObject *type = &disasm_result_object_type; + gdbpy_ref<disasm_result_object> res + ((disasm_result_object *) type->tp_alloc (type, 0)); + disasmpy_init_disassembler_result (res.get (), length, std::move (content)); + return reinterpret_cast<PyObject *> (res.release ()); +} + +/* Implement gdb._set_enabled function. Takes a boolean parameter, and + sets whether GDB should enter the Python disassembler code or not. + + This is called from within the Python code when a new disassembler is + registered. When no disassemblers are registered the global C++ flag + is set to false, and GDB never even enters the Python environment to + check for a disassembler. + + When the user registers a new Python disassembler, the global C++ flag + is set to true, and now GDB will enter the Python environment to check + if there's a disassembler registered for the current architecture. */ + +static PyObject * +disasmpy_set_enabled (PyObject *self, PyObject *args, PyObject *kw) +{ + PyObject *newstate; + static const char *keywords[] = { "state", nullptr }; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "O", keywords, + &newstate)) + return nullptr; + + if (!PyBool_Check (newstate)) + { + PyErr_SetString (PyExc_TypeError, + _("The value passed to `_set_enabled' must be a boolean.")); + return nullptr; + } + + python_print_insn_enabled = PyObject_IsTrue (newstate); + Py_RETURN_NONE; +} + +/* Implement DisassembleInfo.read_memory(LENGTH, OFFSET). Read LENGTH + bytes at OFFSET from the start of the instruction currently being + disassembled, and return a memory buffer containing the bytes. + + OFFSET defaults to zero if it is not provided. LENGTH is required. If + the read fails then this will raise a gdb.MemoryError exception. */ + +static PyObject * +disasmpy_info_read_memory (PyObject *self, PyObject *args, PyObject *kw) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + + LONGEST length, offset = 0; + gdb::unique_xmalloc_ptr<gdb_byte> buffer; + static const char *keywords[] = { "length", "offset", nullptr }; + + if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "L|L", keywords, + &length, &offset)) + return nullptr; + + /* The apparent address from which we are reading memory. Note that in + some cases GDB actually disassembles instructions from a buffer, so + we might not actually be reading this information directly from the + inferior memory. This is all hidden behind the read_memory_func API + within the disassemble_info structure. */ + CORE_ADDR address = obj->address + offset; + + /* Setup a buffer to hold the result. */ + buffer.reset ((gdb_byte *) xmalloc (length)); + + /* Read content into BUFFER. If the read fails then raise a memory + error, otherwise, convert BUFFER to a Python memory buffer, and return + it to the user. */ + disassemble_info *info = obj->gdb_info; + if (info->read_memory_func ((bfd_vma) address, buffer.get (), + (unsigned int) length, info) != 0) + { + disasmpy_set_memory_error_for_address (address); + return nullptr; + } + return gdbpy_buffer_to_membuf (std::move (buffer), address, length); +} + +/* Implement DisassembleInfo.address attribute, return the address at which + GDB would like an instruction disassembled. */ + +static PyObject * +disasmpy_info_address (PyObject *self, void *closure) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + return gdb_py_object_from_longest (obj->address).release (); +} + +/* Implement DisassembleInfo.architecture attribute. Return the + gdb.Architecture in which we are disassembling. */ + +static PyObject * +disasmpy_info_architecture (PyObject *self, void *closure) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + return gdbarch_to_arch_object (obj->gdbarch); +} + +/* Implement DisassembleInfo.progspace attribute. Return the + gdb.Progspace in which we are disassembling. */ + +static PyObject * +disasmpy_info_progspace (PyObject *self, void *closure) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + return pspace_to_pspace_object (obj->program_space).release (); +} + +/* This implements the disassemble_info read_memory_func callback and is + called from the libopcodes disassembler when the disassembler wants to + read memory. + + From the INFO argument we can find the gdbpy_disassembler object for + which we are disassembling, and from that object we can find the + DisassembleInfo for the current disassembly call. + + This function reads the instruction bytes by calling the read_memory + method on the DisassembleInfo object. This method might have been + overridden by user code. + + Read LEN bytes from MEMADDR and place them into BUFF. Return 0 on + success (in which case BUFF has been filled), or -1 on error, in which + case the contents of BUFF are undefined. */ + +int +gdbpy_disassembler::read_memory_func (bfd_vma memaddr, gdb_byte *buff, + unsigned int len, + struct disassemble_info *info) +{ + gdbpy_disassembler *dis + = static_cast<gdbpy_disassembler *> (info->application_data); + disasm_info_object *obj = dis->py_disasm_info (); + + /* The DisassembleInfo.read_memory method expects an offset from the + address stored within the DisassembleInfo object; calculate that + offset here. */ + LONGEST offset = (LONGEST) memaddr - (LONGEST) obj->address; + + /* Now call the DisassembleInfo.read_memory method. This might have been + overridden by the user. */ + gdbpy_ref<> result_obj (PyObject_CallMethod ((PyObject *) obj, + "read_memory", + "KL", len, offset)); + + /* Handle any exceptions. */ + if (result_obj == nullptr) + { + /* If we got a gdb.MemoryError then we ignore this and just report + that the read failed to the caller. The caller is then + responsible for calling the memory_error_func if it wants to. + Remember, the disassembler might just be probing to see if these + bytes can be read, if we automatically call the memory error + function, we can end up registering an error prematurely. */ + if (PyErr_ExceptionMatches (gdbpy_gdb_memory_error)) + { + PyErr_Clear (); + return -1; + } + + /* For any other exception type we capture the value of the Python + exception and throw it, this will then be caught in + disasmpy_builtin_disassemble, at which point the exception will be + restored. */ + throw gdbpy_err_fetch (); + } + + /* Convert the result to a buffer. */ + Py_buffer py_buff; + if (!PyObject_CheckBuffer (result_obj.get ()) + || PyObject_GetBuffer (result_obj.get(), &py_buff, PyBUF_CONTIG_RO) < 0) + { + PyErr_Format (PyExc_TypeError, + _("Result from read_memory is not a buffer")); + throw gdbpy_err_fetch (); + } + + /* Wrap PY_BUFF so that it is cleaned up correctly at the end of this + scope. */ + Py_buffer_up buffer_up (&py_buff); + + /* Validate that the buffer is the correct length. */ + if (py_buff.len != len) + { + PyErr_Format (PyExc_ValueError, + _("Buffer returned from read_memory is sized %d instead of the expected %d"), + py_buff.len, len); + throw gdbpy_err_fetch (); + } + + /* Copy the data out of the Python buffer and return success. */ + const gdb_byte *buffer = (const gdb_byte *) py_buff.buf; + memcpy (buff, buffer, len); + return 0; +} + +/* Implement DisassemblerResult.length attribute, return the length of the + disassembled instruction. */ + +static PyObject * +disasmpy_result_length (PyObject *self, void *closure) +{ + disasm_result_object *obj = (disasm_result_object *) self; + return gdb_py_object_from_longest (obj->length).release (); +} + +/* Implement DisassemblerResult.string attribute, return the content string + of the disassembled instruction. */ + +static PyObject * +disasmpy_result_string (PyObject *self, void *closure) +{ + disasm_result_object *obj = (disasm_result_object *) self; + + gdb_assert (obj->content != nullptr); + gdb_assert (strlen (obj->content->c_str ()) > 0); + gdb_assert (obj->length > 0); + return PyUnicode_Decode (obj->content->c_str (), + obj->content->size (), + host_charset (), nullptr); +} + +/* Implement DisassemblerResult.__init__. Takes two arguments, an + integer, the length in bytes of the disassembled instruction, and a + string, the disassembled content of the instruction. */ + +static int +disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs) +{ + static const char *keywords[] = { "length", "string", NULL }; + int length; + const char *string; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords, + &length, &string)) + return -1; + + if (length <= 0) + { + PyErr_SetString (PyExc_ValueError, + _("Length must be greater than 0.")); + return -1; + } + + if (strlen (string) == 0) + { + PyErr_SetString (PyExc_ValueError, + _("String must not be empty.")); + return -1; + } + + disasm_result_object *obj = (disasm_result_object *) self; + disasmpy_init_disassembler_result (obj, length, std::string (string)); + + return 0; +} + +/* Implement memory_error_func callback for disassemble_info. Extract the + underlying DisassembleInfo Python object, and set a memory error on + it. */ + +void +gdbpy_disassembler::memory_error_func (int status, bfd_vma memaddr, + struct disassemble_info *info) +{ + gdbpy_disassembler *dis + = static_cast<gdbpy_disassembler *> (info->application_data); + dis->m_memory_error_address.emplace (memaddr); +} + +/* Wrapper of print_address. */ + +void +gdbpy_disassembler::print_address_func (bfd_vma addr, + struct disassemble_info *info) +{ + gdbpy_disassembler *dis + = static_cast<gdbpy_disassembler *> (info->application_data); + print_address (dis->arch (), addr, (struct ui_file *) info->stream); +} + +/* constructor. */ + +gdbpy_disassembler::gdbpy_disassembler (disasm_info_object *obj, + PyObject *memory_source) + : gdb_printing_disassembler (obj->gdbarch, &m_string_file, + read_memory_func, memory_error_func, + print_address_func), + m_disasm_info_object (obj), + m_memory_source (memory_source) +{ /* Nothing. */ } + +/* A wrapper around a reference to a Python DisassembleInfo object, which + ensures that the object is marked as invalid when we leave the enclosing + scope. + + Each DisassembleInfo is created in gdbpy_print_insn, and is done with by + the time that function returns. However, there's nothing to stop a user + caching a reference to the DisassembleInfo, and thus keeping the object + around. + + We therefore have the notion of a DisassembleInfo becoming invalid, this + happens when gdbpy_print_insn returns. This class is responsible for + marking the DisassembleInfo as invalid in its destructor. */ + +struct scoped_disasm_info_object +{ + /* Constructor. */ + scoped_disasm_info_object (struct gdbarch *gdbarch, CORE_ADDR memaddr, + disassemble_info *info) + : m_disasm_info (allocate_disasm_info_object ()) + { + disasm_info_fill (m_disasm_info.get (), gdbarch, current_program_space, + memaddr, info, nullptr); + } + + /* Upon destruction mark m_diasm_info as invalid. */ + ~scoped_disasm_info_object () + { + /* Invalidate the original DisassembleInfo object as well as any copies + that the user might have made. */ + for (disasm_info_object *obj = m_disasm_info.get (); + obj != nullptr; + obj = obj->next) + obj->gdb_info = nullptr; + } + + /* Return a pointer to the underlying disasm_info_object instance. */ + disasm_info_object * + get () const + { + return m_disasm_info.get (); + } + +private: + + /* Wrapper around the call to PyObject_New, this wrapper function can be + called from the constructor initialization list, while PyObject_New, a + macro, can't. */ + static disasm_info_object * + allocate_disasm_info_object () + { + return (disasm_info_object *) PyObject_New (disasm_info_object, + &disasm_info_object_type); + } + + /* A reference to a gdb.disassembler.DisassembleInfo object. When this + containing instance goes out of scope this reference is released, + however, the user might be holding other references to the + DisassembleInfo object in Python code, so the underlying object might + not be deleted. */ + gdbpy_ref<disasm_info_object> m_disasm_info; +}; + +/* See python-internal.h. */ + +gdb::optional<int> +gdbpy_print_insn (struct gdbarch *gdbarch, CORE_ADDR memaddr, + disassemble_info *info) +{ + /* Early exit case. This must be done as early as possible, and + definitely before we enter Python environment. The + python_print_insn_enabled flag is set (from Python) only when the user + has installed one (or more) Python disassemblers. So in the common + case (no custom disassembler installed) this flag will be false, + allowing for a quick return. */ + if (!gdb_python_initialized || !python_print_insn_enabled) + return {}; + + gdbpy_enter enter_py (get_current_arch (), current_language); + + /* Import the gdb.disassembler module. */ + gdbpy_ref<> gdb_python_disassembler_module + (PyImport_ImportModule ("gdb.disassembler")); + if (gdb_python_disassembler_module == nullptr) + { + gdbpy_print_stack (); + return {}; + } + + /* Get the _print_insn attribute from the module, this should be the + function we are going to call to actually perform the disassembly. */ + gdbpy_ref<> hook + (PyObject_GetAttrString (gdb_python_disassembler_module.get (), + "_print_insn")); + if (hook == nullptr) + { + gdbpy_print_stack (); + return {}; + } + + /* Create the new DisassembleInfo object we will pass into Python. This + object will be marked as invalid when we leave this scope. */ + scoped_disasm_info_object scoped_disasm_info (gdbarch, memaddr, info); + disasm_info_object *disasm_info = scoped_disasm_info.get (); + + /* Call into the registered disassembler to (possibly) perform the + disassembly. */ + PyObject *insn_disas_obj = (PyObject *) disasm_info; + gdbpy_ref<> result (PyObject_CallFunctionObjArgs (hook.get (), + insn_disas_obj, + nullptr)); + + if (result == nullptr) + { + /* The call into Python code resulted in an exception. If this was a + gdb.MemoryError, then we can figure out an address and call the + disassemble_info::memory_error_func to report the error back to + core GDB. Any other exception type we report back to core GDB as + an unknown error (return -1 without first calling the + memory_error_func callback). */ + + if (PyErr_ExceptionMatches (gdbpy_gdb_memory_error)) + { + /* A gdb.MemoryError might have an address attribute which + contains the address at which the memory error occurred. If + this is the case then use this address, otherwise, fallback to + just using the address of the instruction we were asked to + disassemble. */ + gdbpy_err_fetch err; + PyErr_Clear (); + + CORE_ADDR addr; + if (err.value () != nullptr + && PyObject_HasAttrString (err.value ().get (), "address")) + { + PyObject *addr_obj + = PyObject_GetAttrString (err.value ().get (), "address"); + if (get_addr_from_python (addr_obj, &addr) < 0) + addr = disasm_info->address; + } + else + addr = disasm_info->address; + + info->memory_error_func (-1, addr, info); + return gdb::optional<int> (-1); + } + else if (PyErr_ExceptionMatches (gdbpy_gdberror_exc)) + { + gdbpy_err_fetch err; + gdb::unique_xmalloc_ptr<char> msg = err.to_string (); + + info->fprintf_func (info->stream, "%s", msg.get ()); + return gdb::optional<int> (-1); + } + else + { + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + } + else if (result == Py_None) + { + /* A return value of None indicates that the Python code could not, + or doesn't want to, disassemble this instruction. Just return an + empty result and core GDB will try to disassemble this for us. */ + return {}; + } + + /* Check the result is a DisassemblerResult (or a sub-class). */ + if (!PyObject_IsInstance (result.get (), + (PyObject *) &disasm_result_object_type)) + { + PyErr_SetString (PyExc_TypeError, + _("Result is not a DisassemblerResult.")); + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + /* The call into Python neither raised an exception, or returned None. + Check to see if the result looks valid. */ + gdbpy_ref<> length_obj (PyObject_GetAttrString (result.get (), "length")); + if (length_obj == nullptr) + { + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + gdbpy_ref<> string_obj (PyObject_GetAttrString (result.get (), "string")); + if (string_obj == nullptr) + { + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + if (!gdbpy_is_string (string_obj.get ())) + { + PyErr_SetString (PyExc_TypeError, _("String attribute is not a string.")); + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + gdb::unique_xmalloc_ptr<char> string + = gdbpy_obj_to_string (string_obj.get ()); + if (string == nullptr) + { + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + long length; + if (!gdb_py_int_as_long (length_obj.get (), &length)) + { + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + long max_insn_length = (gdbarch_max_insn_length_p (gdbarch) ? + gdbarch_max_insn_length (gdbarch) : INT_MAX); + if (length <= 0) + { + PyErr_SetString + (PyExc_ValueError, + _("Invalid length attribute: length must be greater than 0.")); + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + if (length > max_insn_length) + { + PyErr_Format + (PyExc_ValueError, + _("Invalid length attribute: length %d greater than architecture maximum of %d"), + length, max_insn_length); + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + if (strlen (string.get ()) == 0) + { + PyErr_SetString (PyExc_ValueError, + _("String attribute must not be empty.")); + gdbpy_print_stack (); + return gdb::optional<int> (-1); + } + + /* Print the disassembled instruction back to core GDB, and return the + length of the disassembled instruction. */ + info->fprintf_func (info->stream, "%s", string.get ()); + return gdb::optional<int> (length); +} + +/* The tp_dealloc callback for the DisassemblerResult type. Takes care of + deallocating the content buffer. */ + +static void +disasmpy_dealloc_result (PyObject *self) +{ + disasm_result_object *obj = (disasm_result_object *) self; + delete obj->content; + Py_TYPE (self)->tp_free (self); +} + +/* The get/set attributes of the gdb.disassembler.DisassembleInfo type. */ + +static gdb_PyGetSetDef disasm_info_object_getset[] = { + { "address", disasmpy_info_address, nullptr, + "Start address of the instruction to disassemble.", nullptr }, + { "architecture", disasmpy_info_architecture, nullptr, + "Architecture to disassemble in", nullptr }, + { "progspace", disasmpy_info_progspace, nullptr, + "Program space to disassemble in", nullptr }, + { nullptr } /* Sentinel */ +}; + +/* The methods of the gdb.disassembler.DisassembleInfo type. */ + +static PyMethodDef disasm_info_object_methods[] = { + { "read_memory", (PyCFunction) disasmpy_info_read_memory, + METH_VARARGS | METH_KEYWORDS, + "read_memory (LEN, OFFSET = 0) -> Octets[]\n\ +Read LEN octets for the instruction to disassemble." }, + { "is_valid", disasmpy_info_is_valid, METH_NOARGS, + "is_valid () -> Boolean.\n\ +Return true if this DisassembleInfo is valid, false if not." }, + {nullptr} /* Sentinel */ +}; + +/* The get/set attributes of the gdb.disassembler.DisassemblerResult type. */ + +static gdb_PyGetSetDef disasm_result_object_getset[] = { + { "length", disasmpy_result_length, nullptr, + "Length of the disassembled instruction.", nullptr }, + { "string", disasmpy_result_string, nullptr, + "String representing the disassembled instruction.", nullptr }, + { nullptr } /* Sentinel */ +}; + +/* These are the methods we add into the _gdb.disassembler module, which + are then imported into the gdb.disassembler module. These are global + functions that support performing disassembly. */ + +PyMethodDef python_disassembler_methods[] = +{ + { "builtin_disassemble", (PyCFunction) disasmpy_builtin_disassemble, + METH_VARARGS | METH_KEYWORDS, + "builtin_disassemble (INFO, MEMORY_SOURCE = None) -> None\n\ +Disassemble using GDB's builtin disassembler. INFO is an instance of\n\ +gdb.disassembler.DisassembleInfo. The MEMORY_SOURCE, if not None, should\n\ +be an object with the read_memory method." }, + { "_set_enabled", (PyCFunction) disasmpy_set_enabled, + METH_VARARGS | METH_KEYWORDS, + "_set_enabled (STATE) -> None\n\ +Set whether GDB should call into the Python _print_insn code or not." }, + {nullptr, nullptr, 0, nullptr} +}; + +/* Structure to define the _gdb.disassembler module. */ + +static struct PyModuleDef python_disassembler_module_def = +{ + PyModuleDef_HEAD_INIT, + "_gdb.disassembler", + nullptr, + -1, + python_disassembler_methods, + nullptr, + nullptr, + nullptr, + nullptr +}; + +/* Called to initialize the Python structures in this file. */ + +int +gdbpy_initialize_disasm () +{ + /* Create the _gdb.disassembler module, and add it to the _gdb module. */ + + PyObject *gdb_disassembler_module; + gdb_disassembler_module = PyModule_Create (&python_disassembler_module_def); + if (gdb_disassembler_module == nullptr) + return -1; + PyModule_AddObject(gdb_module, "disassembler", gdb_disassembler_module); + + /* This is needed so that 'import _gdb.disassembler' will work. */ + PyObject *dict = PyImport_GetModuleDict (); + PyDict_SetItemString (dict, "_gdb.disassembler", gdb_disassembler_module); + + disasm_info_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_info_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassembleInfo", + (PyObject *) &disasm_info_object_type) < 0) + return -1; + + disasm_result_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_result_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassemblerResult", + (PyObject *) &disasm_result_object_type) < 0) + return -1; + + return 0; +} + +/* Describe the gdb.disassembler.DisassembleInfo type. */ + +PyTypeObject disasm_info_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassembleInfo", /*tp_name*/ + sizeof (disasm_info_object), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + disasm_info_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "GDB instruction disassembler object", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + disasm_info_object_methods, /* tp_methods */ + 0, /* tp_members */ + disasm_info_object_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + disasm_info_init, /* tp_init */ + 0, /* tp_alloc */ +}; + +/* Describe the gdb.disassembler.DisassemblerResult type. */ + +PyTypeObject disasm_result_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerResult", /*tp_name*/ + sizeof (disasm_result_object), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + disasmpy_dealloc_result, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "GDB object, representing a disassembler result", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + disasm_result_object_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + disasmpy_result_init, /* tp_init */ + 0, /* tp_alloc */ +}; diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h index da2e791..5ff9989 100644 --- a/gdb/python/python-internal.h +++ b/gdb/python/python-internal.h @@ -540,6 +540,8 @@ int gdbpy_initialize_connection () int gdbpy_initialize_micommands (void) CPYCHECKER_NEGATIVE_RESULT_SETS_EXCEPTION; void gdbpy_finalize_micommands (); +int gdbpy_initialize_disasm () + CPYCHECKER_NEGATIVE_RESULT_SETS_EXCEPTION; /* A wrapper for PyErr_Fetch that handles reference counting for the caller. */ @@ -587,6 +589,13 @@ public: return PyErr_GivenExceptionMatches (m_error_type.get (), type); } + /* Return a new reference to the exception value object. */ + + gdbpy_ref<> value () + { + return m_error_value; + } + private: gdbpy_ref<> m_error_type, m_error_value, m_error_traceback; @@ -840,4 +849,18 @@ extern bool gdbpy_is_progspace (PyObject *obj); extern gdb::unique_xmalloc_ptr<char> gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr<char> doc); +/* Implement the 'print_insn' hook for Python. Disassemble an instruction + whose address is ADDRESS for architecture GDBARCH. The bytes of the + instruction should be read with INFO->read_memory_func as the + instruction being disassembled might actually be in a buffer. + + Used INFO->fprintf_func to print the results of the disassembly, and + return the length of the instruction in octets. + + If no instruction can be disassembled then return an empty value. */ + +extern gdb::optional<int> gdbpy_print_insn (struct gdbarch *gdbarch, + CORE_ADDR address, + disassemble_info *info); + #endif /* PYTHON_PYTHON_INTERNAL_H */ diff --git a/gdb/python/python.c b/gdb/python/python.c index 97de5f5..079c260 100644 --- a/gdb/python/python.c +++ b/gdb/python/python.c @@ -167,7 +167,7 @@ static const struct extension_language_ops python_extension_ops = gdbpy_colorize_disasm, - NULL, /* gdbpy_print_insn, */ + gdbpy_print_insn, }; #endif /* HAVE_PYTHON */ @@ -2053,6 +2053,7 @@ do_start_initialization () if (gdbpy_initialize_auto_load () < 0 || gdbpy_initialize_values () < 0 + || gdbpy_initialize_disasm () < 0 || gdbpy_initialize_frames () < 0 || gdbpy_initialize_commands () < 0 || gdbpy_initialize_instruction () < 0 diff --git a/gdb/testsuite/gdb.python/py-disasm.c b/gdb/testsuite/gdb.python/py-disasm.c new file mode 100644 index 0000000..ee0bb15 --- /dev/null +++ b/gdb/testsuite/gdb.python/py-disasm.c @@ -0,0 +1,25 @@ +/* This test program is part of GDB, the GNU debugger. + + Copyright 2021-2022 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +int +main () +{ + asm ("nop"); + asm ("nop"); /* Break here. */ + asm ("nop"); + return 0; +} diff --git a/gdb/testsuite/gdb.python/py-disasm.exp b/gdb/testsuite/gdb.python/py-disasm.exp new file mode 100644 index 0000000..1b9cd44 --- /dev/null +++ b/gdb/testsuite/gdb.python/py-disasm.exp @@ -0,0 +1,209 @@ +# Copyright (C) 2021-2022 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# This file is part of the GDB testsuite. It validates the Python +# disassembler API. + +load_lib gdb-python.exp + +standard_testfile + +if { [prepare_for_testing "failed to prepare" ${testfile} ${srcfile} "debug"] } { + return -1 +} + +# Skip all tests if Python scripting is not enabled. +if { [skip_python_tests] } { continue } + +if ![runto_main] then { + fail "can't run to main" + return 0 +} + +set pyfile [gdb_remote_download host ${srcdir}/${subdir}/${testfile}.py] + +gdb_test "source ${pyfile}" "Python script imported" \ + "import python scripts" + +gdb_breakpoint [gdb_get_line_number "Break here."] +gdb_continue_to_breakpoint "Break here." + +set curr_pc [get_valueof "/x" "\$pc" "*unknown*"] + +gdb_test_no_output "python current_pc = ${curr_pc}" + +# The current pc will be something like 0x1234 with no leading zeros. +# However, in the disassembler output addresses are padded with zeros. +# This substitution changes 0x1234 to 0x0*1234, which can then be used +# as a regexp in the disassembler output matching. +set curr_pc_pattern [string replace ${curr_pc} 0 1 "0x0*"] + +# Grab the name of the current architecture, this is used in the tests +# patterns below. +set curr_arch [get_python_valueof "gdb.selected_inferior().architecture().name()" "*unknown*"] + +# Helper proc that removes all registered disassemblers. +proc py_remove_all_disassemblers {} { + gdb_test_no_output "python remove_all_python_disassemblers()" +} + +# A list of test plans. Each plan is a list of two elements, the +# first element is the name of a class in py-disasm.py, this is a +# disassembler class. The second element is a pattern that should be +# matched in the disassembler output. +# +# Each different disassembler tests some different feature of the +# Python disassembler API. +set unknown_error_pattern "unknown disassembler error \\(error = -1\\)" +set addr_pattern "\r\n=> ${curr_pc_pattern} <\[^>\]+>:\\s+" +set base_pattern "${addr_pattern}nop" +set test_plans \ + [list \ + [list "" "${base_pattern}\r\n.*"] \ + [list "GlobalNullDisassembler" "${base_pattern}\r\n.*"] \ + [list "GlobalPreInfoDisassembler" "${base_pattern}\\s+## ad = $hex, ar = ${curr_arch}\r\n.*"] \ + [list "GlobalPostInfoDisassembler" "${base_pattern}\\s+## ad = $hex, ar = ${curr_arch}\r\n.*"] \ + [list "GlobalReadDisassembler" "${base_pattern}\\s+## bytes =( $hex)+\r\n.*"] \ + [list "GlobalAddrDisassembler" "${base_pattern}\\s+## addr = ${curr_pc_pattern} <\[^>\]+>\r\n.*"] \ + [list "GdbErrorEarlyDisassembler" "${addr_pattern}GdbError instead of a result\r\n${unknown_error_pattern}"] \ + [list "RuntimeErrorEarlyDisassembler" "${addr_pattern}Python Exception <class 'RuntimeError'>: RuntimeError instead of a result\r\n\r\n${unknown_error_pattern}"] \ + [list "GdbErrorLateDisassembler" "${addr_pattern}GdbError after builtin disassembler\r\n${unknown_error_pattern}"] \ + [list "RuntimeErrorLateDisassembler" "${addr_pattern}Python Exception <class 'RuntimeError'>: RuntimeError after builtin disassembler\r\n\r\n${unknown_error_pattern}"] \ + [list "MemoryErrorEarlyDisassembler" "${base_pattern}\\s+## AFTER ERROR\r\n.*"] \ + [list "MemoryErrorLateDisassembler" "${addr_pattern}Cannot access memory at address ${curr_pc_pattern}"] \ + [list "RethrowMemoryErrorDisassembler" "${addr_pattern}Cannot access memory at address $hex"] \ + [list "ReadMemoryMemoryErrorDisassembler" "${addr_pattern}Cannot access memory at address ${curr_pc_pattern}"] \ + [list "ReadMemoryGdbErrorDisassembler" "${addr_pattern}read_memory raised GdbError\r\n${unknown_error_pattern}"] \ + [list "ReadMemoryRuntimeErrorDisassembler" "${addr_pattern}Python Exception <class 'RuntimeError'>: read_memory raised RuntimeError\r\n\r\n${unknown_error_pattern}"] \ + [list "ReadMemoryCaughtMemoryErrorDisassembler" "${addr_pattern}nop\r\n.*"] \ + [list "ReadMemoryCaughtGdbErrorDisassembler" "${addr_pattern}nop\r\n.*"] \ + [list "ReadMemoryCaughtRuntimeErrorDisassembler" "${addr_pattern}nop\r\n.*"] \ + [list "MemorySourceNotABufferDisassembler" "${addr_pattern}Python Exception <class 'TypeError'>: Result from read_memory is not a buffer\r\n\r\n${unknown_error_pattern}"] \ + [list "MemorySourceBufferTooLongDisassembler" "${addr_pattern}Python Exception <class 'ValueError'>: Buffer returned from read_memory is sized $decimal instead of the expected $decimal\r\n\r\n${unknown_error_pattern}"] \ + [list "ResultOfWrongType" "${addr_pattern}Python Exception <class 'TypeError'>: Result is not a DisassemblerResult.\r\n.*"] \ + [list "ResultWithInvalidLength" "${addr_pattern}Python Exception <class 'ValueError'>: Invalid length attribute: length must be greater than 0.\r\n.*"] \ + [list "ResultWithInvalidString" "${addr_pattern}Python Exception <class 'ValueError'>: String attribute must not be empty.\r\n.*"]] + +# Now execute each test plan. +foreach plan $test_plans { + set global_disassembler_name [lindex $plan 0] + set expected_pattern [lindex $plan 1] + + with_test_prefix "global_disassembler=${global_disassembler_name}" { + # Remove all existing disassemblers. + py_remove_all_disassemblers + + # If we have a disassembler to load, do it now. + if { $global_disassembler_name != "" } { + gdb_test_no_output "python add_global_disassembler($global_disassembler_name)" + } + + # Disassemble main, and check the disassembler output. + gdb_test "disassemble main" $expected_pattern + } +} + +# Check some errors relating to DisassemblerResult creation. +with_test_prefix "DisassemblerResult errors" { + gdb_test "python gdb.disassembler.DisassemblerResult(0, 'abc')" \ + [multi_line \ + "ValueError: Length must be greater than 0." \ + "Error while executing Python code."] + gdb_test "python gdb.disassembler.DisassemblerResult(-1, 'abc')" \ + [multi_line \ + "ValueError: Length must be greater than 0." \ + "Error while executing Python code."] + gdb_test "python gdb.disassembler.DisassemblerResult(1, '')" \ + [multi_line \ + "ValueError: String must not be empty." \ + "Error while executing Python code."] +} + +# Check that the architecture specific disassemblers can override the +# global disassembler. +# +# First, register a global disassembler, and check it is in place. +with_test_prefix "GLOBAL tagging disassembler" { + py_remove_all_disassemblers + gdb_test_no_output "python gdb.disassembler.register_disassembler(TaggingDisassembler(\"GLOBAL\"), None)" + gdb_test "disassemble main" "${base_pattern}\\s+## tag = GLOBAL\r\n.*" +} + +# Now register an architecture specific disassembler, and check it +# overrides the global disassembler. +with_test_prefix "LOCAL tagging disassembler" { + gdb_test_no_output "python gdb.disassembler.register_disassembler(TaggingDisassembler(\"LOCAL\"), \"${curr_arch}\")" + gdb_test "disassemble main" "${base_pattern}\\s+## tag = LOCAL\r\n.*" +} + +# Now remove the architecture specific disassembler, and check that +# the global disassembler kicks back in. +with_test_prefix "GLOBAL tagging disassembler again" { + gdb_test_no_output "python gdb.disassembler.register_disassembler(None, \"${curr_arch}\")" + gdb_test "disassemble main" "${base_pattern}\\s+## tag = GLOBAL\r\n.*" +} + +# Check that a DisassembleInfo becomes invalid after the call into the +# disassembler. +with_test_prefix "DisassembleInfo becomes invalid" { + py_remove_all_disassemblers + gdb_test_no_output "python add_global_disassembler(GlobalCachingDisassembler)" + gdb_test "disassemble main" "${base_pattern}\\s+## CACHED\r\n.*" + gdb_test "python GlobalCachingDisassembler.check()" "PASS" +} + +# Test the memory source aspect of the builtin disassembler. +with_test_prefix "memory source api" { + py_remove_all_disassemblers + gdb_test_no_output "python analyzing_disassembler = add_global_disassembler(AnalyzingDisassembler)" + gdb_test "disassemble main" "${base_pattern}\r\n.*" + gdb_test "python analyzing_disassembler.find_replacement_candidate()" \ + "Replace from $hex to $hex with NOP" + gdb_test "disassemble main" "${base_pattern}\r\n.*" \ + "second disassembler pass" + gdb_test "python analyzing_disassembler.check()" \ + "PASS" +} + +# Test the 'maint info python-disassemblers command. +with_test_prefix "maint info python-disassemblers" { + py_remove_all_disassemblers + gdb_test "maint info python-disassemblers" "No Python disassemblers registered\\." \ + "list disassemblers, none registered" + gdb_test_no_output "python disasm = add_global_disassembler(BuiltinDisassembler)" + gdb_test "maint info python-disassemblers" \ + [multi_line \ + "Architecture\\s+Disassember Name" \ + "GLOBAL\\s+BuiltinDisassembler\\s+\\(Matches current architecture\\)"] \ + "list disassemblers, single global disassembler" + gdb_test_no_output "python arch = gdb.selected_inferior().architecture().name()" + gdb_test_no_output "python gdb.disassembler.register_disassembler(disasm, arch)" + gdb_test "maint info python-disassemblers" \ + [multi_line \ + "Architecture\\s+Disassember Name" \ + "\[^\r\n\]+BuiltinDisassembler\\s+\\(Matches current architecture\\)" \ + "GLOBAL\\s+BuiltinDisassembler"] \ + "list disassemblers, multiple disassemblers registered" +} + +# Check the attempt to create a "new" DisassembleInfo object fails. +with_test_prefix "Bad DisassembleInfo creation" { + gdb_test_no_output "python my_info = InvalidDisassembleInfo()" + gdb_test "python print(my_info.is_valid())" "True" + gdb_test "python gdb.disassembler.builtin_disassemble(my_info)" \ + [multi_line \ + "RuntimeError: DisassembleInfo is no longer valid\\." \ + "Error while executing Python code\\."] +} diff --git a/gdb/testsuite/gdb.python/py-disasm.py b/gdb/testsuite/gdb.python/py-disasm.py new file mode 100644 index 0000000..ff7ffdb --- /dev/null +++ b/gdb/testsuite/gdb.python/py-disasm.py @@ -0,0 +1,712 @@ +# Copyright (C) 2021-2022 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import gdb +import gdb.disassembler +import struct +import sys + +from gdb.disassembler import Disassembler, DisassemblerResult + +# A global, holds the program-counter address at which we should +# perform the extra disassembly that this script provides. +current_pc = None + + +# Remove all currently registered disassemblers. +def remove_all_python_disassemblers(): + for a in gdb.architecture_names(): + gdb.disassembler.register_disassembler(None, a) + gdb.disassembler.register_disassembler(None, None) + + +class TestDisassembler(Disassembler): + """A base class for disassemblers within this script to inherit from. + Implements the __call__ method and ensures we only do any + disassembly wrapping for the global CURRENT_PC.""" + + def __init__(self): + global current_pc + + super().__init__("TestDisassembler") + self.__info = None + if current_pc == None: + raise gdb.GdbError("no current_pc set") + + def __call__(self, info): + global current_pc + + if info.address != current_pc: + return None + self.__info = info + return self.disassemble(info) + + def get_info(self): + return self.__info + + def disassemble(self, info): + raise NotImplementedError("override the disassemble method") + + +class GlobalPreInfoDisassembler(TestDisassembler): + """Check the attributes of DisassembleInfo before disassembly has occurred.""" + + def disassemble(self, info): + ad = info.address + ar = info.architecture + + if ad != current_pc: + raise gdb.GdbError("invalid address") + + if not isinstance(ar, gdb.Architecture): + raise gdb.GdbError("invalid architecture type") + + result = gdb.disassembler.builtin_disassemble(info) + + text = result.string + "\t## ad = 0x%x, ar = %s" % (ad, ar.name()) + return DisassemblerResult(result.length, text) + + +class GlobalPostInfoDisassembler(TestDisassembler): + """Check the attributes of DisassembleInfo after disassembly has occurred.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + + ad = info.address + ar = info.architecture + + if ad != current_pc: + raise gdb.GdbError("invalid address") + + if not isinstance(ar, gdb.Architecture): + raise gdb.GdbError("invalid architecture type") + + text = result.string + "\t## ad = 0x%x, ar = %s" % (ad, ar.name()) + return DisassemblerResult(result.length, text) + + +class GlobalReadDisassembler(TestDisassembler): + """Check the DisassembleInfo.read_memory method. Calls the builtin + disassembler, then reads all of the bytes of this instruction, and + adds them as a comment to the disassembler output.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + len = result.length + str = "" + for o in range(len): + if str != "": + str += " " + v = bytes(info.read_memory(1, o))[0] + if sys.version_info[0] < 3: + v = struct.unpack("<B", v) + str += "0x%02x" % v + text = result.string + "\t## bytes = %s" % str + return DisassemblerResult(result.length, text) + + +class GlobalAddrDisassembler(TestDisassembler): + """Check the gdb.format_address method.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + arch = info.architecture + addr = info.address + program_space = info.progspace + str = gdb.format_address(addr, program_space, arch) + text = result.string + "\t## addr = %s" % str + return DisassemblerResult(result.length, text) + + +class GdbErrorEarlyDisassembler(TestDisassembler): + """Raise a GdbError instead of performing any disassembly.""" + + def disassemble(self, info): + raise gdb.GdbError("GdbError instead of a result") + + +class RuntimeErrorEarlyDisassembler(TestDisassembler): + """Raise a RuntimeError instead of performing any disassembly.""" + + def disassemble(self, info): + raise RuntimeError("RuntimeError instead of a result") + + +class GdbErrorLateDisassembler(TestDisassembler): + """Raise a GdbError after calling the builtin disassembler.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + raise gdb.GdbError("GdbError after builtin disassembler") + + +class RuntimeErrorLateDisassembler(TestDisassembler): + """Raise a RuntimeError after calling the builtin disassembler.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + raise RuntimeError("RuntimeError after builtin disassembler") + + +class MemoryErrorEarlyDisassembler(TestDisassembler): + """Throw a memory error, ignore the error and disassemble.""" + + def disassemble(self, info): + tag = "## FAIL" + try: + info.read_memory(1, -info.address + 2) + except gdb.MemoryError: + tag = "## AFTER ERROR" + result = gdb.disassembler.builtin_disassemble(info) + text = result.string + "\t" + tag + return DisassemblerResult(result.length, text) + + +class MemoryErrorLateDisassembler(TestDisassembler): + """Throw a memory error after calling the builtin disassembler, but + before we return a result.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + # The following read will throw an error. + info.read_memory(1, -info.address + 2) + return DisassemblerResult(1, "BAD") + + +class RethrowMemoryErrorDisassembler(TestDisassembler): + """Catch and rethrow a memory error.""" + + def disassemble(self, info): + try: + info.read_memory(1, -info.address + 2) + except gdb.MemoryError as e: + raise gdb.MemoryError("cannot read code at address 0x2") + return DisassemblerResult(1, "BAD") + + +class ResultOfWrongType(TestDisassembler): + """Return something that is not a DisassemblerResult from disassemble method""" + + class Blah: + def __init__(self, length, string): + self.length = length + self.string = string + + def disassemble(self, info): + return self.Blah(1, "ABC") + + +class ResultWrapper(gdb.disassembler.DisassemblerResult): + def __init__(self, length, string, length_x=None, string_x=None): + super().__init__(length, string) + if length_x is None: + self.__length = length + else: + self.__length = length_x + if string_x is None: + self.__string = string + else: + self.__string = string_x + + @property + def length(self): + return self.__length + + @property + def string(self): + return self.__string + + +class ResultWithInvalidLength(TestDisassembler): + """Return a result object with an invalid length.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + return ResultWrapper(result.length, result.string, 0) + + +class ResultWithInvalidString(TestDisassembler): + """Return a result object with an empty string.""" + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + return ResultWrapper(result.length, result.string, None, "") + + +class TaggingDisassembler(TestDisassembler): + """A simple disassembler that just tags the output.""" + + def __init__(self, tag): + super().__init__() + self._tag = tag + + def disassemble(self, info): + result = gdb.disassembler.builtin_disassemble(info) + text = result.string + "\t## tag = %s" % self._tag + return DisassemblerResult(result.length, text) + + +class GlobalCachingDisassembler(TestDisassembler): + """A disassembler that caches the DisassembleInfo that is passed in, + as well as a copy of the original DisassembleInfo. + + Once the call into the disassembler is complete then the + DisassembleInfo objects become invalid, and any calls into them + should trigger an exception.""" + + # This is where we cache the DisassembleInfo objects. + cached_insn_disas = [] + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def disassemble(self, info): + """Disassemble the instruction, add a CACHED comment to the output, + and cache the DisassembleInfo so that it is not garbage collected.""" + GlobalCachingDisassembler.cached_insn_disas.append(info) + GlobalCachingDisassembler.cached_insn_disas.append(self.MyInfo(info)) + result = gdb.disassembler.builtin_disassemble(info) + text = result.string + "\t## CACHED" + return DisassemblerResult(result.length, text) + + @staticmethod + def check(): + """Check that all of the methods on the cached DisassembleInfo trigger an + exception.""" + for info in GlobalCachingDisassembler.cached_insn_disas: + assert isinstance(info, gdb.disassembler.DisassembleInfo) + assert not info.is_valid() + try: + val = info.address + raise gdb.GdbError("DisassembleInfo.address is still valid") + except RuntimeError as e: + assert str(e) == "DisassembleInfo is no longer valid." + except: + raise gdb.GdbError( + "DisassembleInfo.address raised an unexpected exception" + ) + + try: + val = info.architecture + raise gdb.GdbError("DisassembleInfo.architecture is still valid") + except RuntimeError as e: + assert str(e) == "DisassembleInfo is no longer valid." + except: + raise gdb.GdbError( + "DisassembleInfo.architecture raised an unexpected exception" + ) + + try: + val = info.read_memory(1, 0) + raise gdb.GdbError("DisassembleInfo.read is still valid") + except RuntimeError as e: + assert str(e) == "DisassembleInfo is no longer valid." + except: + raise gdb.GdbError( + "DisassembleInfo.read raised an unexpected exception" + ) + + print("PASS") + + +class GlobalNullDisassembler(TestDisassembler): + """A disassembler that does not change the output at all.""" + + def disassemble(self, info): + pass + + +class ReadMemoryMemoryErrorDisassembler(TestDisassembler): + """Raise a MemoryError exception from the DisassembleInfo.read_memory + method.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + # Throw a memory error with a specific address. We don't + # expect this address to show up in the output though. + raise gdb.MemoryError(0x1234) + + def disassemble(self, info): + info = self.MyInfo(info) + return gdb.disassembler.builtin_disassemble(info) + + +class ReadMemoryGdbErrorDisassembler(TestDisassembler): + """Raise a GdbError exception from the DisassembleInfo.read_memory + method.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + raise gdb.GdbError("read_memory raised GdbError") + + def disassemble(self, info): + info = self.MyInfo(info) + return gdb.disassembler.builtin_disassemble(info) + + +class ReadMemoryRuntimeErrorDisassembler(TestDisassembler): + """Raise a RuntimeError exception from the DisassembleInfo.read_memory + method.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + raise RuntimeError("read_memory raised RuntimeError") + + def disassemble(self, info): + info = self.MyInfo(info) + return gdb.disassembler.builtin_disassemble(info) + + +class ReadMemoryCaughtMemoryErrorDisassembler(TestDisassembler): + """Raise a MemoryError exception from the DisassembleInfo.read_memory + method, catch this in the outer disassembler.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + raise gdb.MemoryError(0x1234) + + def disassemble(self, info): + info = self.MyInfo(info) + try: + return gdb.disassembler.builtin_disassemble(info) + except gdb.MemoryError: + return None + + +class ReadMemoryCaughtGdbErrorDisassembler(TestDisassembler): + """Raise a GdbError exception from the DisassembleInfo.read_memory + method, catch this in the outer disassembler.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + raise gdb.GdbError("exception message") + + def disassemble(self, info): + info = self.MyInfo(info) + try: + return gdb.disassembler.builtin_disassemble(info) + except gdb.GdbError as e: + if e.args[0] == "exception message": + return None + raise e + + +class ReadMemoryCaughtRuntimeErrorDisassembler(TestDisassembler): + """Raise a RuntimeError exception from the DisassembleInfo.read_memory + method, catch this in the outer disassembler.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + raise RuntimeError("exception message") + + def disassemble(self, info): + info = self.MyInfo(info) + try: + return gdb.disassembler.builtin_disassemble(info) + except RuntimeError as e: + if e.args[0] == "exception message": + return None + raise e + + +class MemorySourceNotABufferDisassembler(TestDisassembler): + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + return 1234 + + def disassemble(self, info): + info = self.MyInfo(info) + return gdb.disassembler.builtin_disassemble(info) + + +class MemorySourceBufferTooLongDisassembler(TestDisassembler): + """The read memory returns too many bytes.""" + + class MyInfo(gdb.disassembler.DisassembleInfo): + def __init__(self, info): + super().__init__(info) + + def read_memory(self, length, offset): + buffer = super().read_memory(length, offset) + # Create a new memory view made by duplicating BUFFER. This + # will trigger an error as GDB expects a buffer of exactly + # LENGTH to be returned, while this will return a buffer of + # 2*LENGTH. + return memoryview( + bytes([int.from_bytes(x, "little") for x in (list(buffer[0:]) * 2)]) + ) + + def disassemble(self, info): + info = self.MyInfo(info) + return gdb.disassembler.builtin_disassemble(info) + + +class BuiltinDisassembler(Disassembler): + """Just calls the builtin disassembler.""" + + def __init__(self): + super().__init__("BuiltinDisassembler") + + def __call__(self, info): + return gdb.disassembler.builtin_disassemble(info) + + +class AnalyzingDisassembler(Disassembler): + class MyInfo(gdb.disassembler.DisassembleInfo): + """Wrapper around builtin DisassembleInfo type that overrides the + read_memory method.""" + + def __init__(self, info, start, end, nop_bytes): + """INFO is the DisassembleInfo we are wrapping. START and END are + addresses, and NOP_BYTES should be a memoryview object. + + The length (END - START) should be the same as the length + of NOP_BYTES. + + Any memory read requests outside the START->END range are + serviced normally, but any attempt to read within the + START->END range will return content from NOP_BYTES.""" + super().__init__(info) + self._start = start + self._end = end + self._nop_bytes = nop_bytes + + def _read_replacement(self, length, offset): + """Return a slice of the buffer representing the replacement nop + instructions.""" + + assert self._nop_bytes is not None + rb = self._nop_bytes + + # If this request is outside of a nop instruction then we don't know + # what to do, so just raise a memory error. + if offset >= len(rb) or (offset + length) > len(rb): + raise gdb.MemoryError("invalid length and offset combination") + + # Return only the slice of the nop instruction as requested. + s = offset + e = offset + length + return rb[s:e] + + def read_memory(self, length, offset=0): + """Callback used by the builtin disassembler to read the contents of + memory.""" + + # If this request is within the region we are replacing with 'nop' + # instructions, then call the helper function to perform that + # replacement. + if self._start is not None: + assert self._end is not None + if self.address >= self._start and self.address < self._end: + return self._read_replacement(length, offset) + + # Otherwise, we just forward this request to the default read memory + # implementation. + return super().read_memory(length, offset) + + def __init__(self): + """Constructor.""" + super().__init__("AnalyzingDisassembler") + + # Details about the instructions found during the first disassembler + # pass. + self._pass_1_length = [] + self._pass_1_insn = [] + self._pass_1_address = [] + + # The start and end address for the instruction we will replace with + # one or more 'nop' instructions during pass two. + self._start = None + self._end = None + + # The index in the _pass_1_* lists for where the nop instruction can + # be found, also, the buffer of bytes that make up a nop instruction. + self._nop_index = None + self._nop_bytes = None + + # A flag that indicates if we are in the first or second pass of + # this disassembler test. + self._first_pass = True + + # The disassembled instructions collected during the second pass. + self._pass_2_insn = [] + + # A copy of _pass_1_insn that has been modified to include the extra + # 'nop' instructions we plan to insert during the second pass. This + # is then checked against _pass_2_insn after the second disassembler + # pass has completed. + self._check = [] + + def __call__(self, info): + """Called to perform the disassembly.""" + + # Override the info object, this provides access to our + # read_memory function. + info = self.MyInfo(info, self._start, self._end, self._nop_bytes) + result = gdb.disassembler.builtin_disassemble(info) + + # Record some informaiton about the first 'nop' instruction we find. + if self._nop_index is None and result.string == "nop": + self._nop_index = len(self._pass_1_length) + # The offset in the following read_memory call defaults to 0. + print("APB: Reading nop bytes") + self._nop_bytes = info.read_memory(result.length) + + # Record information about each instruction that is disassembled. + # This test is performed in two passes, and we need different + # information in each pass. + if self._first_pass: + self._pass_1_length.append(result.length) + self._pass_1_insn.append(result.string) + self._pass_1_address.append(info.address) + else: + self._pass_2_insn.append(result.string) + + return result + + def find_replacement_candidate(self): + """Call this after the first disassembly pass. This identifies a suitable + instruction to replace with 'nop' instruction(s).""" + + if self._nop_index is None: + raise gdb.GdbError("no nop was found") + + nop_idx = self._nop_index + nop_length = self._pass_1_length[nop_idx] + + # First we look for an instruction that is larger than a nop + # instruction, but whose length is an exact multiple of the nop + # instruction's length. + replace_idx = None + for idx in range(len(self._pass_1_length)): + if ( + idx > 0 + and idx != nop_idx + and self._pass_1_insn[idx] != "nop" + and self._pass_1_length[idx] > self._pass_1_length[nop_idx] + and self._pass_1_length[idx] % self._pass_1_length[nop_idx] == 0 + ): + replace_idx = idx + break + + # If we still don't have a replacement candidate, then search again, + # this time looking for an instruciton that is the same length as a + # nop instruction. + if replace_idx is None: + for idx in range(len(self._pass_1_length)): + if ( + idx > 0 + and idx != nop_idx + and self._pass_1_insn[idx] != "nop" + and self._pass_1_length[idx] == self._pass_1_length[nop_idx] + ): + replace_idx = idx + break + + # Weird, the nop instruction must be larger than every other + # instruction, or all instructions are 'nop'? + if replace_idx is None: + raise gdb.GdbError("can't find an instruction to replace") + + # Record the instruction range that will be replaced with 'nop' + # instructions, and mark that we are now on the second pass. + self._start = self._pass_1_address[replace_idx] + self._end = self._pass_1_address[replace_idx] + self._pass_1_length[replace_idx] + self._first_pass = False + print("Replace from 0x%x to 0x%x with NOP" % (self._start, self._end)) + + # Finally, build the expected result. Create the _check list, which + # is a copy of _pass_1_insn, but replace the instruction we + # identified above with a series of 'nop' instructions. + self._check = list(self._pass_1_insn) + nop_count = int(self._pass_1_length[replace_idx] / self._pass_1_length[nop_idx]) + nops = ["nop"] * nop_count + self._check[replace_idx : (replace_idx + 1)] = nops + + def check(self): + """Call this after the second disassembler pass to validate the output.""" + if self._check != self._pass_2_insn: + print("APB, Check : %s" % self._check) + print("APB, Result: %s" % self._pass_2_insn) + raise gdb.GdbError("mismatch") + print("PASS") + + +def add_global_disassembler(dis_class): + """Create an instance of DIS_CLASS and register it as a global disassembler.""" + dis = dis_class() + gdb.disassembler.register_disassembler(dis, None) + return dis + + +class InvalidDisassembleInfo(gdb.disassembler.DisassembleInfo): + """An attempt to create a DisassembleInfo sub-class without calling + the parent class init method. + + Attempts to use instances of this class should throw an error + saying that the DisassembleInfo is not valid, despite this class + having all of the required attributes. + + The reason why this class will never be valid is that an internal + field (within the C++ code) can't be initialized without calling + the parent class init method.""" + + def __init__(self): + assert current_pc is not None + + def is_valid(self): + return True + + @property + def address(self): + global current_pc + return current_pc + + @property + def architecture(self): + return gdb.selected_inferior().architecture() + + @property + def progspace(self): + return gdb.selected_inferior().progspace + + +# Start with all disassemblers removed. +remove_all_python_disassemblers() + +print("Python script imported") |