diff options
-rw-r--r-- | gdb/NEWS | 19 | ||||
-rw-r--r-- | gdb/doc/python.texi | 311 | ||||
-rw-r--r-- | gdb/python/py-disasm.c | 871 | ||||
-rw-r--r-- | gdb/testsuite/gdb.python/py-disasm.exp | 94 | ||||
-rw-r--r-- | gdb/testsuite/gdb.python/py-disasm.py | 164 |
5 files changed, 1366 insertions, 93 deletions
@@ -173,6 +173,25 @@ info main ** It is now no longer possible to sub-class the gdb.disassembler.DisassemblerResult type. + ** The Disassembler API from the gdb.disassembler module has been + extended to include styling support: + + - The DisassemblerResult class can now be initialized with a list + of parts. Each part represents part of the disassembled + instruction along with the associated style information. This + list of parts can be accessed with the new + DisassemblerResult.parts property. + + - New constants gdb.disassembler.STYLE_* representing all the + different styles part of an instruction might have. + + - New methods DisassembleInfo.text_part and + DisassembleInfo.address_part which are used to create the new + styled parts of a disassembled instruction. + + - Changes are backwards compatible, the older API can still be + used to disassemble instructions without styling. + *** Changes in GDB 13 * MI version 1 is deprecated, and will be removed in GDB 14. diff --git a/gdb/doc/python.texi b/gdb/doc/python.texi index a906c16..5d714ee 100644 --- a/gdb/doc/python.texi +++ b/gdb/doc/python.texi @@ -6870,6 +6870,7 @@ values can be 1 (left), 2 (middle), or 3 (right). using the Python API. The disassembler related features are contained within the @code{gdb.disassembler} module: +@anchor{DisassembleInfo Class} @deftp {class} gdb.disassembler.DisassembleInfo Disassembly is driven by instances of this class. Each time @value{GDBN} needs to disassemble an instruction, an instance of this @@ -6971,6 +6972,25 @@ not itself raise a @code{MemoryError}. Any other exception type raised in @code{read_memory} will propagate back and be re-raised by @code{builtin_disassemble}. @end defun + +@defun DisassembleInfo.text_part (style, string) +Create a new @code{DisassemblerTextPart} representing a piece of a +disassembled instruction. @var{string} should be a non-empty string, +and @var{style} should be an appropriate style constant +(@pxref{Disassembler Style Constants}). + +Disassembler parts are used when creating a @code{DisassemblerResult} +in order to represent the styling within an instruction +(@pxref{DisassemblerResult Class}). +@end defun + +@defun DisassembleInfo.address_part (address) +Create a new @code{DisassemblerAddressPart}. @var{address} is the +value of the absolute address this part represents. A +@code{DisassemblerAddressPart} is displayed as an absolute address and +an associated symbol, the address and symbol are styled appropriately. +@end defun + @end deftp @anchor{Disassembler Class} @@ -7024,6 +7044,7 @@ the error stream according to the @kbd{set python print-stack} setting @end defun @end deftp +@anchor{DisassemblerResult Class} @deftp {class} gdb.disassembler.DisassemblerResult This class represents the result of disassembling a single instruction. An instance of this class will be returned from @@ -7037,11 +7058,30 @@ It is not possible to sub-class the @code{DisassemblerResult} class. The @code{DisassemblerResult} class has the following properties and methods: -@defun DisassemblerResult.__init__ (length, string) +@defun DisassemblerResult.__init__ (length, string, parts) Initialize an instance of this class, @var{length} is the length of the disassembled instruction in bytes, which must be greater than -zero, and @var{string} is a non-empty string that represents the -disassembled instruction. +zero. + +Only one of @var{string} or @var{parts} should be used to initialize a +new @code{DisassemblerResult}; the other one should be passed the +value @code{None}. Alternatively, the arguments can be passed by +name, and the unused argument can be ignored. + +The @var{string} argument, if not @code{None}, is a non-empty string +that represents the entire disassembled instruction. Building a result +object using the @var{string} argument does not allow for any styling +information to be included in the result. @value{GDBN} will style the +result as a single @code{DisassemblerTextPart} with @code{STYLE_TEXT} +style (@pxref{Disassembler Styling Parts}). + +The @var{parts} argument, if not @code{None}, is a non-empty sequence +of @code{DisassemblerPart} objects. Each part represents a small part +of the disassembled instruction along with associated styling +information. A result object built using @var{parts} can be displayed +by @value{GDBN} with full styling information +(@pxref{style_disassembler_enabled,,@kbd{set style disassembler +enabled}}). @end defun @defvar DisassemblerResult.length @@ -7051,10 +7091,273 @@ instruction in bytes, this will always be greater than zero. @defvar DisassemblerResult.string A read-only property containing a non-empty string representing the -disassembled instruction. +disassembled instruction. The @var{string} is a representation of the +disassembled instruction without any styling information. To see how +the instruction will be styled use the @var{parts} property. + +If this instance was initialized using separate +@code{DisassemblerPart} objects, the @var{string} property will still +be valid. The @var{string} value is created by concatenating the +@code{DisassemblerPart.string} values of each component part +(@pxref{Disassembler Styling Parts}). +@end defvar + +@defvar DisassemblerResult.parts +A read-only property containing a non-empty sequence of +@code{DisassemblerPart} objects. Each @code{DisassemblerPart} object +contains a small part of the instruction along with information about +how that part should be styled. @value{GDBN} uses this information to +create styled disassembler output +(@pxref{style_disassembler_enabled,,@kbd{set style disassembler +enabled}}). + +If this instance was initialized using a single string rather than +with a sequence of @code{DisassemblerPart} objects, the @var{parts} +property will still be valid. In this case the @var{parts} property +will hold a sequence containing a single @code{DisassemblerTextPart} +object, the string of which will represent the entire instruction, and +the style of which will be @code{STYLE_TEXT}. +@end defvar +@end deftp + +@anchor{Disassembler Styling Parts} +@deftp {class} gdb.disassembler.DisassemblerPart +This is a parent class from which the different part sub-classes +inherit. Only instances of the sub-classes detailed below will be +returned by the Python API. + +It is not possible to directly create instances of either this parent +class, or any of the sub-classes listed below. Instances of the +sub-classes listed below are created by calling +@code{builtin_disassemble} (@pxref{builtin_disassemble}) and are +returned within the @code{DisassemblerResult} object, or can be +created by calling the @code{text_part} and @code{address_part} +methods on the @code{DisassembleInfo} class (@pxref{DisassembleInfo +Class}). + +The @code{DisassemblerPart} class has a single property: + +@defvar DisassemblerPart.string +A read-only property that contains a non-empty string representing +this part of the disassembled instruction. The string within this +property doesn't include any styling information. @end defvar @end deftp +@deftp {class} gdb.disassembler.DisassemblerTextPart +The @code{DisassemblerTextPart} class represents a piece of the +disassembled instruction and the associated style for that piece. +Instances of this class can't be created directly, instead call +@code{DisassembleInfo.text_part} to create a new instance of this +class (@pxref{DisassembleInfo Class}). + +As well as the properties of its parent class, the +@code{DisassemblerTextPart} has the following additional property: + +@defvar DisassemblerTextPart.style +A read-only property that contains one of the defined style constants. +@value{GDBN} will use this style when styling this part of the +disassembled instruction (@pxref{Disassembler Style Constants}). +@end defvar +@end deftp + +@deftp {class} gdb.disassembler.DisassemblerAddressPart +The @code{DisassemblerAddressPart} class represents an absolute +address within a disassembled instruction. Using a +@code{DisassemblerAddressPart} instead of a +@code{DisassemblerTextPart} with @code{STYLE_ADDRESS} is preferred, +@value{GDBN} will display the address as both an absolute address, and +will look up a suitable symbol to display next to the address. Using +@code{DisassemblerAddressPart} also ensures that user settings such as +@code{set print max-symbolic-offset} are respected. + +Here is an example of an x86-64 instruction: + +@smallexample +call 0x401136 <foo> +@end smallexample + +@noindent +In this instruction the @code{0x401136 <foo>} was generated from a +single @code{DisassemblerAddressPart}. The @code{0x401136} will be +styled with @code{STYLE_ADDRESS}, and @code{foo} will be styled with +@code{STYLE_SYMBOL}. The @code{<} and @code{>} will be styled as +@code{STYLE_TEXT}. + +If the inclusion of the symbol name is not required then a +@code{DisassemblerTextPart} with style @code{STYLE_ADDRESS} can be +used instead. + +Instances of this class can't be created directly, instead call +@code{DisassembleInfo.address_part} to create a new instance of this +class (@pxref{DisassembleInfo Class}). + +As well as the properties of its parent class, the +@code{DisassemblerAddressPart} has the following additional property: + +@defvar DisassemblerAddressPart.address +A read-only property that contains the @var{address} passed to this +object's @code{__init__} method. +@end defvar +@end deftp + +@anchor{Disassembler Style Constants} + +The following table lists all of the disassembler styles that are +available. @value{GDBN} maps these style constants onto its style +settings (@pxref{Output Styling}). In some cases, several style +constants produce the same style settings, and thus will produce the +same visual effect on the screen. This could change in future +releases of @value{GDBN}, so care should be taken to select the +correct style constant to ensure correct output styling in future +releases of @value{GDBN}. + +@vtable @code +@vindex STYLE_TEXT +@item gdb.disassembler.STYLE_TEXT +This is the default style used by @value{GDBN} when styling +disassembler output. This style should be used for any parts of the +instruction that don't fit any of the other styles listed below. +@value{GDBN} styles text with this style using its default style. + +@vindex STYLE_MNEMONIC +@item gdb.disassembler.STYLE_MNEMONIC +This style is used for styling the primary instruction mnemonic, which +usually appears at, or near, the start of the disassembled instruction +string. + +@value{GDBN} styles text with this style using the @code{disassembler +mnemonic} style setting. + +@vindex STYLE_SUB_MNEMONIC +@item gdb.disassembler.STYLE_SUB_MNEMONIC +This style is used for styling any sub-mnemonics within a disassembled +instruction. A sub-mnemonic is any text within the instruction that +controls the function of the instruction, but which is disjoint from +the primary mnemonic (which will have styled @code{STYLE_MNEMONIC}). + +As an example, consider this AArch64 instruction: + +@smallexample +add w16, w7, w1, lsl #1 +@end smallexample + +@noindent +The @code{add} is the primary instruction mnemonic, and would be given +style @code{STYLE_MNEMONIC}, while @code{lsl} is the sub-mnemonic, and +would be given the style @code{STYLE_SUB_MNEMONIC}. + +@value{GDBN} styles text with this style using the @code{disassembler +mnemonic} style setting. + +@vindex STYLE_ASSEMBLER_DIRECTIVE +@item gdb.disassembler.STYLE_ASSEMBLER_DIRECTIVE +Sometimes a series of bytes doesn't decode to a valid instruction. In +this case the disassembler may choose to represent the result of +disassembling using an assembler directive, for example: + +@smallexample +.word 0x1234 +@end smallexample + +@noindent +In this case, the @code{.word} would be give the +@code{STYLE_ASSEMBLER_DIRECTIVE} style. An assembler directive is +similar to a mnemonic in many ways but is something that is not part +of the architecture's instruction set. + +@value{GDBN} styles text with this style using the @code{disassembler +mnemonic} style setting. + +@vindex STYLE_REGISTER +@item gdb.disassembler.STYLE_REGISTER +This style is used for styling any text that represents a register +name, or register number, within a disassembled instruction. + +@value{GDBN} styles text with this style using the @code{disassembler +register} style setting. + +@vindex STYLE_ADDRESS +@item gdb.disassembler.STYLE_ADDRESS +This style is used for styling numerical values that represent +absolute addresses within the disassembled instruction. + +When creating a @code{DisassemblerTextPart} with this style, you +should consider if a @code{DisassemblerAddressPart} would be more +appropriate. See @ref{Disassembler Styling Parts} for a description +of what each part offers. + +@value{GDBN} styles text with this style using the @code{disassembler +address} style setting. + +@vindex STYLE_ADDRESS_OFFSET +@item gdb.disassembler.STYLE_ADDRESS_OFFSET +This style is used for styling numerical values that represent offsets +to addresses within the disassembled instruction. A value is +considered an address offset when the instruction itself is going to +access memory, and the value is being used to offset which address is +accessed. + +For example, an architecture might have an instruction that loads from +memory using an address within a register. If that instruction also +allowed for an immediate offset to be encoded into the instruction, +this would be an address offset. Similarly, a branch instruction +might jump to an address in a register plus an address offset that is +encoded into the instruction. + +@value{GDBN} styles text with this style using the @code{disassembler +immediate} style setting. + +@vindex STYLE_IMMEDIATE +@item gdb.disassembler.STYLE_IMMEDIATE +Use @code{STYLE_IMMEDIATE} for any numerical values within a +disassembled instruction when those values are not addresses, address +offsets, or register numbers (The styles @code{STYLE_ADDRESS}, +@code{STYLE_ADDRESS_OFFSET}, or @code{STYLE_REGISTER} can be used in +those cases). + +@value{GDBN} styles text with this style using the @code{disassembler +immediate} style setting. + +@vindex STYLE_SYMBOL +@item gdb.disassembler.STYLE_SYMBOL +This style is used for styling the textual name of a symbol that is +included within a disassembled instruction. A symbol name is often +included next to an absolute address within a disassembled instruction +to make it easier for the user to understand what the address is +referring too. For example: + +@smallexample +call 0x401136 <foo> +@end smallexample + +@noindent +Here @code{foo} is the name of a symbol, and should be given the +@code{STYLE_SYMBOL} style. + +Adding symbols next to absolute addresses like this is handled +automatically by the @code{DisassemblerAddressPart} class +(@pxref{Disassembler Styling Parts}). + +@value{GDBN} styles text with this style using the @code{disassembler +symbol} style setting. + +@vindex STYLE_COMMENT_START +@item gdb.disassembler.STYLE_COMMENT_START +This style is used to start a line comment in the disassembly output. +Unlike other styles, which only apply to the single +@code{DisassemblerTextPiece} to which they are applied, the comment +style is sticky, and overrides the style of any further pieces within +this instruction. + +This means that, after a @code{STYLE_COMMENT_START} piece has been +seen, @value{GDBN} will apply the comment style until the end of the +line, ignoring the specific style within a piece. + +@value{GDBN} styles text with this style using the @code{disassembler +comment} style setting. +@end vtable + The following functions are also contained in the @code{gdb.disassembler} module: diff --git a/gdb/python/py-disasm.c b/gdb/python/py-disasm.c index f246a09..85d936e 100644 --- a/gdb/python/py-disasm.c +++ b/gdb/python/py-disasm.c @@ -56,6 +56,49 @@ struct disasm_info_object extern PyTypeObject disasm_info_object_type CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_info_object"); +/* Implement gdb.disassembler.DisassembleAddressPart type. An object of + this type represents a small part of a disassembled instruction; a part + that is an address that should be printed using a call to GDB's + internal print_address function. */ + +struct disasm_addr_part_object +{ + PyObject_HEAD + + /* The address to be formatted. */ + bfd_vma address; + + /* A gdbarch. This is only needed in the case where the user asks for + the DisassemblerAddressPart to be converted to a string. When we + return this part to GDB within a DisassemblerResult then GDB will use + the gdbarch from the initial disassembly request. */ + struct gdbarch *gdbarch; +}; + +extern PyTypeObject disasm_addr_part_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_addr_part_object"); + +/* Implement gdb.disassembler.DisassembleTextPart type. An object of + this type represents a small part of a disassembled instruction; a part + that is a piece of test along with an associated style. */ + +struct disasm_text_part_object +{ + PyObject_HEAD + + /* The string that is this part. */ + std::string *string; + + /* The style to use when displaying this part. */ + enum disassembler_style style; +}; + +extern PyTypeObject disasm_text_part_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_text_part_object"); + +extern PyTypeObject disasm_part_object_type + CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("PyObject"); + /* Implement gdb.disassembler.DisassemblerResult type, an object that holds the result of calling the disassembler. This is mostly the length of the disassembled instruction (in bytes), and the string representing the @@ -68,9 +111,9 @@ struct disasm_result_object /* The length of the disassembled instruction in bytes. */ int length; - /* A buffer which, when allocated, holds the disassembled content of an - instruction. */ - string_file *content; + /* A vector containing all the parts of the disassembled instruction. + Each part will be a DisassemblerPart sub-class. */ + std::vector<gdbpy_ref<>> *parts; }; extern PyTypeObject disasm_result_object_type @@ -88,7 +131,7 @@ static bool python_print_insn_enabled = false; placed in the application_data field of the disassemble_info that is used when we call gdbarch_print_insn. */ -struct gdbpy_disassembler : public gdb_printing_disassembler +struct gdbpy_disassembler : public gdb_disassemble_info { /* Constructor. */ gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source); @@ -109,6 +152,27 @@ struct gdbpy_disassembler : public gdb_printing_disassembler unsigned int len, struct disassemble_info *info) noexcept; + /* Callback used as the disassemble_info's fprintf_func callback. The + DIS_INFO pointer is a pointer to a gdbpy_disassembler object. */ + static int fprintf_func (void *dis_info, const char *format, ...) noexcept + ATTRIBUTE_PRINTF(2,3); + + /* Callback used as the disassemble_info's fprintf_styled_func callback. + The DIS_INFO pointer is a pointer to a gdbpy_disassembler. */ + static int fprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, ...) noexcept + ATTRIBUTE_PRINTF(3,4); + + /* Helper used by fprintf_func and fprintf_styled_func. This function + creates a new DisassemblerTextPart and adds it to the disassembler's + parts list. The actual disassembler is accessed through DIS_INFO, + which is a pointer to the gdbpy_disassembler object. */ + static int vfprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, va_list args) noexcept + ATTRIBUTE_PRINTF(3,0); + /* Return a reference to an optional that contains the address at which a memory error occurred. The optional will only have a value if a memory error actually occurred. */ @@ -118,9 +182,9 @@ struct gdbpy_disassembler : public gdb_printing_disassembler /* Return the content of the disassembler as a string. The contents are moved out of the disassembler, so after this call the disassembler contents have been reset back to empty. */ - std::string release () + std::vector<gdbpy_ref<>> release () { - return m_string_file.release (); + return std::move (m_parts); } /* If there is a Python exception stored in this disassembler then @@ -147,8 +211,10 @@ struct gdbpy_disassembler : public gdb_printing_disassembler private: - /* Where the disassembler result is written. */ - string_file m_string_file; + /* The list of all the parts that make up this disassembled instruction. + This is populated as a result of the callbacks from libopcodes as the + instruction is disassembled. */ + std::vector<gdbpy_ref<>> m_parts; /* The DisassembleInfo object we are disassembling for. */ disasm_info_object *m_disasm_info_object; @@ -286,6 +352,38 @@ disasmpy_set_memory_error_for_address (CORE_ADDR address) PyErr_SetObject (gdbpy_gdb_memory_error, address_obj); } +/* Create a new DisassemblerTextPart and return a gdbpy_ref wrapper for + the new object. STR is the string content of the part and STYLE is the + style to be used when GDB displays this part. */ + +static gdbpy_ref<> +make_disasm_text_part (std::string &&str, enum disassembler_style style) +{ + PyTypeObject *type = &disasm_text_part_object_type; + disasm_text_part_object *text_part + = (disasm_text_part_object *) type->tp_alloc (type, 0); + text_part->string = new std::string (str); + text_part->style = style; + + return gdbpy_ref<> ((PyObject *) text_part); +} + +/* Create a new DisassemblerAddressPart and return a gdbpy_ref wrapper for + the new object. GDBARCH is the architecture used when formatting the + address, and ADDRESS is the numerical address to be displayed. */ + +static gdbpy_ref<> +make_disasm_addr_part (struct gdbarch *gdbarch, CORE_ADDR address) +{ + PyTypeObject *type = &disasm_addr_part_object_type; + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) type->tp_alloc (type, 0); + addr_part->address = address; + addr_part->gdbarch = gdbarch; + + return gdbpy_ref<> ((PyObject *) addr_part); +} + /* Ensure that a gdb.disassembler.DisassembleInfo is valid. */ #define DISASMPY_DISASM_INFO_REQUIRE_VALID(Info) \ @@ -298,21 +396,135 @@ disasmpy_set_memory_error_for_address (CORE_ADDR address) } \ } while (0) -/* Initialise OBJ, a DisassemblerResult object with LENGTH and CONTENT. +/* Implement DisassembleInfo.text_part method. Creates and returns a new + DisassemblerTextPart object. */ + +static PyObject * +disasmpy_info_make_text_part (PyObject *self, PyObject *args, + PyObject *kwargs) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + + static const char *keywords[] = { "style", "string", NULL }; + int style_num; + const char *string; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords, + &style_num, &string)) + return nullptr; + + if (style_num < 0 || style_num > ((int) dis_style_comment_start)) + { + PyErr_SetString (PyExc_ValueError, + _("Invalid disassembler style.")); + return nullptr; + } + + if (strlen (string) == 0) + { + PyErr_SetString (PyExc_ValueError, + _("String must not be empty.")); + return nullptr; + } + + gdbpy_ref<> text_part + = make_disasm_text_part (std::string (string), + (enum disassembler_style) style_num); + return text_part.release (); +} + +/* Implement DisassembleInfo.address_part method. Creates and returns a + new DisassemblerAddressPart object. */ + +static PyObject * +disasmpy_info_make_address_part (PyObject *self, PyObject *args, + PyObject *kwargs) +{ + disasm_info_object *obj = (disasm_info_object *) self; + DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); + + static const char *keywords[] = { "address", NULL }; + CORE_ADDR address; + PyObject *address_object; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "O", keywords, + &address_object)) + return nullptr; + + if (get_addr_from_python (address_object, &address) < 0) + return nullptr; + + return make_disasm_addr_part (obj->gdbarch, address).release (); +} + +/* Return a string representation of TEXT_PART. The returned string does + not include any styling. */ + +static std::string +disasmpy_part_to_string (const disasm_text_part_object *text_part) +{ + gdb_assert (text_part->string != nullptr); + return *(text_part->string); +} + +/* Return a string representation of ADDR_PART. The returned string does + not include any styling. */ + +static std::string +disasmpy_part_to_string (const disasm_addr_part_object *addr_part) +{ + string_file buf; + print_address (addr_part->gdbarch, addr_part->address, &buf); + return buf.release (); +} + +/* PARTS is a vector of Python objects, each is a sub-class of + DisassemblerPart. Create a string by concatenating the string + representation of each part, and return this new string. + + Converting an address part requires that we call back into GDB core, + which could throw an exception. As such, calls to this function should + be wrapped with a try/catch. */ + +static std::string +disasmpy_parts_list_to_string (const std::vector<gdbpy_ref<>> &parts) +{ + std::string str; + for (auto p : parts) + { + if (Py_TYPE (p.get ()) == &disasm_text_part_object_type) + { + disasm_text_part_object *text_part + = (disasm_text_part_object *) p.get (); + str += disasmpy_part_to_string (text_part); + } + else + { + gdb_assert (Py_TYPE (p.get ()) == &disasm_addr_part_object_type); + + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) p.get (); + str += disasmpy_part_to_string (addr_part); + } + } + + return str; +} + +/* Initialise OBJ, a DisassemblerResult object with LENGTH and PARTS. OBJ might already have been initialised, in which case any existing - content should be discarded before the new CONTENT is moved in. */ + content should be discarded before the new PARTS are moved in. */ static void disasmpy_init_disassembler_result (disasm_result_object *obj, int length, - std::string content) + std::vector<gdbpy_ref<>> &&parts) { - if (obj->content == nullptr) - obj->content = new string_file; + if (obj->parts == nullptr) + obj->parts = new std::vector<gdbpy_ref<>>; else - obj->content->clear (); + obj->parts->clear (); obj->length = length; - *(obj->content) = std::move (content); + *(obj->parts) = std::move (parts); } /* Implement gdb.disassembler.builtin_disassemble(). Calls back into GDB's @@ -375,9 +587,19 @@ disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw) } else { - std::string content = disassembler.release (); - if (!content.empty ()) - PyErr_SetString (gdbpy_gdberror_exc, content.c_str ()); + auto content = disassembler.release (); + std::string str; + + try + { + str = disasmpy_parts_list_to_string (content); + } + catch (const gdb_exception &except) + { + GDB_PY_HANDLE_EXCEPTION (except); + } + if (!str.empty ()) + PyErr_SetString (gdbpy_gdberror_exc, str.c_str ()); else PyErr_SetString (gdbpy_gdberror_exc, _("Unknown disassembly error.")); @@ -393,10 +615,10 @@ disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw) gdb_assert (!disassembler.memory_error_address ().has_value ()); /* Create a DisassemblerResult containing the results. */ - std::string content = disassembler.release (); PyTypeObject *type = &disasm_result_object_type; gdbpy_ref<disasm_result_object> res ((disasm_result_object *) type->tp_alloc (type, 0)); + auto content = disassembler.release (); disasmpy_init_disassembler_result (res.get (), length, std::move (content)); return reinterpret_cast<PyObject *> (res.release ()); } @@ -510,6 +732,88 @@ disasmpy_info_progspace (PyObject *self, void *closure) return pspace_to_pspace_object (obj->program_space).release (); } +/* Helper function called when the libopcodes disassembler produces some + output. FORMAT and ARGS are used to create a string which GDB will + display using STYLE. The string is either added as a new + DisassemblerTextPart to the list of parts being built in the current + gdbpy_disassembler object (accessed through DIS_INFO). Or, if the last + part in the gdbpy_disassembler is a text part in the same STYLE, then + the new string is appended to the previous part. + + The merging behaviour make the Python API a little more user friendly, + some disassemblers produce their output character at a time, there's no + particular reason for this, it's just how they are implemented. By + merging parts with the same style we make it easier for the user to + analyse the disassembler output. */ + +int +gdbpy_disassembler::vfprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, + va_list args) noexcept +{ + gdb_disassemble_info *di = (gdb_disassemble_info *) dis_info; + gdbpy_disassembler *dis + = gdb::checked_static_cast<gdbpy_disassembler *> (di); + + if (!dis->m_parts.empty () + && Py_TYPE (dis->m_parts.back ().get ()) == &disasm_text_part_object_type + && (((disasm_text_part_object *) dis->m_parts.back ().get ())->style + == style)) + { + std::string *string + = ((disasm_text_part_object *) dis->m_parts.back ().get ())->string; + string_vappendf (*string, format, args); + } + else + { + std::string str = string_vprintf (format, args); + if (str.size () > 0) + { + gdbpy_ref<> text_part + = make_disasm_text_part (std::move (str), style); + dis->m_parts.emplace_back (std::move (text_part)); + } + } + + /* Something non -ve. */ + return 0; +} + +/* Disassembler callback for architectures where libopcodes doesn't + created styled output. In these cases we format all the output using + the (default) text style. */ + +int +gdbpy_disassembler::fprintf_func (void *dis_info, + const char *format, ...) noexcept +{ + va_list args; + va_start (args, format); + vfprintf_styled_func (dis_info, dis_style_text, format, args); + va_end (args); + + /* Something non -ve. */ + return 0; +} + +/* Disassembler callback for architectures where libopcodes does create + styled output. Just creates a new text part with the given STYLE. */ + +int +gdbpy_disassembler::fprintf_styled_func (void *dis_info, + enum disassembler_style style, + const char *format, ...) noexcept +{ + va_list args; + va_start (args, format); + vfprintf_styled_func (dis_info, style, format, args); + va_end (args); + + /* Something non -ve. */ + return 0; +} + /* This implements the disassemble_info read_memory_func callback and is called from the libopcodes disassembler when the disassembler wants to read memory. @@ -615,11 +919,24 @@ disasmpy_result_str (PyObject *self) { disasm_result_object *obj = (disasm_result_object *) self; - gdb_assert (obj->content != nullptr); - gdb_assert (obj->content->size () > 0); + /* These conditions are all enforced when the DisassemblerResult object + is created. */ + gdb_assert (obj->parts != nullptr); + gdb_assert (obj->parts->size () > 0); gdb_assert (obj->length > 0); - return PyUnicode_Decode (obj->content->c_str (), - obj->content->size (), + + std::string str; + + try + { + str = disasmpy_parts_list_to_string (*obj->parts); + } + catch (const gdb_exception &except) + { + GDB_PY_HANDLE_EXCEPTION (except); + } + + return PyUnicode_Decode (str.c_str (), str.size (), host_charset (), nullptr); } @@ -642,6 +959,39 @@ disasmpy_result_string (PyObject *self, void *closure) return disasmpy_result_str (self); } +/* Implement DisassemblerResult.parts method. Returns a list of all the + parts that make up this result. There should always be at least one + part, so the returned list should never be empty. */ + +static PyObject * +disasmpy_result_parts (PyObject *self, void *closure) +{ + disasm_result_object *obj = (disasm_result_object *) self; + + /* These conditions are all enforced when the DisassemblerResult object + is created. */ + gdb_assert (obj->parts != nullptr); + gdb_assert (obj->parts->size () > 0); + gdb_assert (obj->length > 0); + + gdbpy_ref<> result_list (PyList_New (obj->parts->size ())); + if (result_list == nullptr) + return nullptr; + Py_ssize_t idx = 0; + for (auto p : *obj->parts) + { + gdbpy_ref<> item = gdbpy_ref<>::new_reference (p.get ()); + PyList_SET_ITEM (result_list.get (), idx, item.release ()); + ++idx; + } + + /* This should follow naturally from the obj->parts list being + non-empty. */ + gdb_assert (PyList_Size (result_list.get()) > 0); + + return result_list.release (); +} + /* Implement DisassemblerResult.__init__. Takes two arguments, an integer, the length in bytes of the disassembled instruction, and a string, the disassembled content of the instruction. */ @@ -649,11 +999,12 @@ disasmpy_result_string (PyObject *self, void *closure) static int disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs) { - static const char *keywords[] = { "length", "string", NULL }; + static const char *keywords[] = { "length", "string", "parts", NULL }; int length; - const char *string; - if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords, - &length, &string)) + const char *string = nullptr; + PyObject *parts_list = nullptr; + if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "i|zO", keywords, + &length, &string, &parts_list)) return -1; if (length <= 0) @@ -663,17 +1014,85 @@ disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs) return -1; } - if (strlen (string) == 0) + if (parts_list == Py_None) + parts_list = nullptr; + + if (string != nullptr && parts_list != nullptr) { - PyErr_SetString (PyExc_ValueError, - _("String must not be empty.")); + PyErr_Format (PyExc_ValueError, + _("Cannot use 'string' and 'parts' when creating %s."), + Py_TYPE (self)->tp_name); return -1; } - disasm_result_object *obj = (disasm_result_object *) self; - disasmpy_init_disassembler_result (obj, length, std::string (string)); + if (string != nullptr) + { + if (strlen (string) == 0) + { + PyErr_SetString (PyExc_ValueError, + _("String must not be empty.")); + return -1; + } + + disasm_result_object *obj = (disasm_result_object *) self; + std::vector<gdbpy_ref<>> content; + gdbpy_ref<> text_part + = make_disasm_text_part (std::string (string), dis_style_text); + content.emplace_back (text_part.release ()); + disasmpy_init_disassembler_result (obj, length, std::move (content)); + } + else + { + if (!PySequence_Check (parts_list)) + { + PyErr_SetString (PyExc_TypeError, + _("'parts' argument is not a sequence")); + return -1; + } + + Py_ssize_t parts_count = PySequence_Size (parts_list); + if (parts_count <= 0) + { + PyErr_SetString (PyExc_ValueError, + _("'parts' list must not be empty.")); + return -1; + } + + disasm_result_object *obj = (disasm_result_object *) self; + std::vector<gdbpy_ref<>> content (parts_count); + + struct gdbarch *gdbarch = nullptr; + for (Py_ssize_t i = 0; i < parts_count; ++i) + { + gdbpy_ref<> part (PySequence_GetItem (parts_list, i)); + + if (part == nullptr) + return -1; + + if (Py_TYPE (part.get ()) == &disasm_addr_part_object_type) + { + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) part.get (); + gdb_assert (addr_part->gdbarch != nullptr); + if (gdbarch == nullptr) + gdbarch = addr_part->gdbarch; + else if (addr_part->gdbarch != gdbarch) + { + PyErr_SetString (PyExc_ValueError, + _("Inconsistent gdb.Architectures used " + "in 'parts' sequence.")); + return -1; + } + } + + content[i] = std::move (part); + } + + disasmpy_init_disassembler_result (obj, length, std::move (content)); + } return 0; + } /* Implement __repr__ for the DisassemblerResult type. */ @@ -683,12 +1102,12 @@ disasmpy_result_repr (PyObject *self) { disasm_result_object *obj = (disasm_result_object *) self; - gdb_assert (obj->content != nullptr); + gdb_assert (obj->parts != nullptr); - return PyUnicode_FromFormat ("<%s length=%d string=\"%s\">", + return PyUnicode_FromFormat ("<%s length=%d string=\"%U\">", Py_TYPE (obj)->tp_name, obj->length, - obj->content->string ().c_str ()); + disasmpy_result_str (self)); } /* Implement memory_error_func callback for disassemble_info. Extract the @@ -712,16 +1131,22 @@ gdbpy_disassembler::print_address_func (bfd_vma addr, { gdbpy_disassembler *dis = static_cast<gdbpy_disassembler *> (info->application_data); - print_address (dis->arch (), addr, dis->stream ()); + + gdbpy_ref<> addr_part + = make_disasm_addr_part (dis->arch (), addr); + dis->m_parts.emplace_back (std::move (addr_part)); } /* constructor. */ gdbpy_disassembler::gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source) - : gdb_printing_disassembler (obj->gdbarch, &m_string_file, - read_memory_func, memory_error_func, - print_address_func), + : gdb_disassemble_info (obj->gdbarch, + read_memory_func, + memory_error_func, + print_address_func, + fprintf_func, + fprintf_styled_func), m_disasm_info_object (obj), m_memory_source (memory_source) { /* Nothing. */ } @@ -932,20 +1357,39 @@ gdbpy_print_insn (struct gdbarch *gdbarch, CORE_ADDR memaddr, return gdb::optional<int> (-1); } - /* Validate the text of the disassembled instruction. */ - gdb_assert (result_obj->content != nullptr); - std::string string (std::move (result_obj->content->release ())); - if (strlen (string.c_str ()) == 0) + /* It is impossible to create a DisassemblerResult object with an empty + parts list. We know that each part results in a non-empty string, so + we know that the instruction disassembly will not be the empty + string. */ + gdb_assert (result_obj->parts->size () > 0); + + /* Now print out the parts that make up this instruction. */ + for (auto &p : *result_obj->parts) { - PyErr_SetString (PyExc_ValueError, - _("String attribute must not be empty.")); - gdbpy_print_stack (); - return gdb::optional<int> (-1); + if (Py_TYPE (p.get ()) == &disasm_text_part_object_type) + { + disasm_text_part_object *text_part + = (disasm_text_part_object *) p.get (); + gdb_assert (text_part->string != nullptr); + info->fprintf_styled_func (info->stream, text_part->style, + "%s", text_part->string->c_str ()); + } + else + { + gdb_assert (Py_TYPE (p.get ()) == &disasm_addr_part_object_type); + disasm_addr_part_object *addr_part + = (disasm_addr_part_object *) p.get (); + /* A DisassemblerAddressPart can only be created by calling a + method on DisassembleInfo, and the gdbarch is copied from the + DisassembleInfo into the DisassemblerAddressPart. As the + DisassembleInfo has its gdbarch initialised from GDBARCH in + this scope, and this architecture can't be changed, then the + following assert should hold. */ + gdb_assert (addr_part->gdbarch == gdbarch); + info->print_address_func (addr_part->address, info); + } } - /* Print the disassembled instruction back to core GDB, and return the - length of the disassembled instruction. */ - info->fprintf_func (info->stream, "%s", string.c_str ()); return gdb::optional<int> (length); } @@ -956,10 +1400,143 @@ static void disasmpy_dealloc_result (PyObject *self) { disasm_result_object *obj = (disasm_result_object *) self; - delete obj->content; + delete obj->parts; Py_TYPE (self)->tp_free (self); } +/* The tp_init callback for the DisassemblerPart type. This just raises an + exception, which prevents the user from creating objects of this type. + Instead the user should create instances of a sub-class. */ + +static int +disasmpy_part_init (PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyErr_SetString (PyExc_RuntimeError, + _("Cannot create instances of DisassemblerPart.")); + return -1; +} + +/* Return a string representing STYLE. The returned string is used as a + constant defined in the gdb.disassembler module. */ + +static const char * +get_style_name (enum disassembler_style style) +{ + switch (style) + { + case dis_style_text: return "STYLE_TEXT"; + case dis_style_mnemonic: return "STYLE_MNEMONIC"; + case dis_style_sub_mnemonic: return "STYLE_SUB_MNEMONIC"; + case dis_style_assembler_directive: return "STYLE_ASSEMBLER_DIRECTIVE"; + case dis_style_register: return "STYLE_REGISTER"; + case dis_style_immediate: return "STYLE_IMMEDIATE"; + case dis_style_address: return "STYLE_ADDRESS"; + case dis_style_address_offset: return "STYLE_ADDRESS_OFFSET"; + case dis_style_symbol: return "STYLE_SYMBOL"; + case dis_style_comment_start: return "STYLE_COMMENT_START"; + } + + gdb_assert_not_reached ("unknown disassembler style"); +} + +/* Implement DisassemblerTextPart.__repr__ method. */ + +static PyObject * +disasmpy_text_part_repr (PyObject *self) +{ + disasm_text_part_object *obj = (disasm_text_part_object *) self; + + gdb_assert (obj->string != nullptr); + + return PyUnicode_FromFormat ("<%s string='%s', style='%s'>", + Py_TYPE (obj)->tp_name, + obj->string->c_str (), + get_style_name (obj->style)); +} + +/* Implement DisassemblerTextPart.__str__ attribute. */ + +static PyObject * +disasmpy_text_part_str (PyObject *self) +{ + disasm_text_part_object *obj = (disasm_text_part_object *) self; + + return PyUnicode_Decode (obj->string->c_str (), obj->string->size (), + host_charset (), nullptr); +} + +/* Implement DisassemblerTextPart.string attribute. */ + +static PyObject * +disasmpy_text_part_string (PyObject *self, void *closure) +{ + return disasmpy_text_part_str (self); +} + +/* Implement DisassemblerTextPart.style attribute. */ + +static PyObject * +disasmpy_text_part_style (PyObject *self, void *closure) +{ + disasm_text_part_object *obj = (disasm_text_part_object *) self; + + LONGEST style_val = (LONGEST) obj->style; + return gdb_py_object_from_longest (style_val).release (); +} + +/* Implement DisassemblerAddressPart.__repr__ method. */ + +static PyObject * +disasmpy_addr_part_repr (PyObject *self) +{ + disasm_addr_part_object *obj = (disasm_addr_part_object *) self; + + return PyUnicode_FromFormat ("<%s address='%s'>", + Py_TYPE (obj)->tp_name, + core_addr_to_string_nz (obj->address)); +} + +/* Implement DisassemblerAddressPart.__str__ attribute. */ + +static PyObject * +disasmpy_addr_part_str (PyObject *self) +{ + disasm_addr_part_object *obj = (disasm_addr_part_object *) self; + + std::string str; + try + { + string_file buf; + print_address (obj->gdbarch, obj->address, &buf); + str = buf.release (); + } + catch (const gdb_exception &except) + { + GDB_PY_HANDLE_EXCEPTION (except); + } + + return PyUnicode_Decode (str.c_str (), str.size (), + host_charset (), nullptr); +} + +/* Implement DisassemblerAddressPart.string attribute. */ + +static PyObject * +disasmpy_addr_part_string (PyObject *self, void *closure) +{ + return disasmpy_addr_part_str (self); +} + +/* Implement DisassemblerAddressPart.address attribute. */ + +static PyObject * +disasmpy_addr_part_address (PyObject *self, void *closure) +{ + disasm_addr_part_object *obj = (disasm_addr_part_object *) self; + + return gdb_py_object_from_longest (obj->address).release (); +} + /* The get/set attributes of the gdb.disassembler.DisassembleInfo type. */ static gdb_PyGetSetDef disasm_info_object_getset[] = { @@ -982,6 +1559,14 @@ Read LEN octets for the instruction to disassemble." }, { "is_valid", disasmpy_info_is_valid, METH_NOARGS, "is_valid () -> Boolean.\n\ Return true if this DisassembleInfo is valid, false if not." }, + { "text_part", (PyCFunction) disasmpy_info_make_text_part, + METH_VARARGS | METH_KEYWORDS, + "text_part (STRING, STYLE) -> DisassemblerTextPart\n\ +Create a new text part, with contents STRING styled with STYLE." }, + { "address_part", (PyCFunction) disasmpy_info_make_address_part, + METH_VARARGS | METH_KEYWORDS, + "address_part (ADDRESS) -> DisassemblerAddressPart\n\ +Create a new address part representing ADDRESS." }, {nullptr} /* Sentinel */ }; @@ -992,6 +1577,28 @@ static gdb_PyGetSetDef disasm_result_object_getset[] = { "Length of the disassembled instruction.", nullptr }, { "string", disasmpy_result_string, nullptr, "String representing the disassembled instruction.", nullptr }, + { "parts", disasmpy_result_parts, nullptr, + "List of all the separate disassembly parts", nullptr }, + { nullptr } /* Sentinel */ +}; + +/* The get/set attributes of the gdb.disassembler.DisassemblerTextPart type. */ + +static gdb_PyGetSetDef disasmpy_text_part_getset[] = { + { "string", disasmpy_text_part_string, nullptr, + "String representing a text part.", nullptr }, + { "style", disasmpy_text_part_style, nullptr, + "The style of this text part.", nullptr }, + { nullptr } /* Sentinel */ +}; + +/* The get/set attributes of the gdb.disassembler.DisassemblerAddressPart type. */ + +static gdb_PyGetSetDef disasmpy_addr_part_getset[] = { + { "string", disasmpy_addr_part_string, nullptr, + "String representing an address part.", nullptr }, + { "address", disasmpy_addr_part_address, nullptr, + "The address of this address part.", nullptr }, { nullptr } /* Sentinel */ }; @@ -1046,6 +1653,13 @@ gdbpy_initialize_disasm () PyObject *dict = PyImport_GetModuleDict (); PyDict_SetItemString (dict, "_gdb.disassembler", gdb_disassembler_module); + for (int i = 0; i <= (int) dis_style_comment_start; ++i) + { + const char *style_name = get_style_name ((enum disassembler_style) i); + if (PyModule_AddIntConstant (gdb_disassembler_module, style_name, i) < 0) + return -1; + } + disasm_info_object_type.tp_new = PyType_GenericNew; if (PyType_Ready (&disasm_info_object_type) < 0) return -1; @@ -1062,6 +1676,32 @@ gdbpy_initialize_disasm () (PyObject *) &disasm_result_object_type) < 0) return -1; + disasm_part_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_part_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassemblerPart", + (PyObject *) &disasm_part_object_type) < 0) + return -1; + + disasm_addr_part_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_addr_part_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, + "DisassemblerAddressPart", + (PyObject *) &disasm_addr_part_object_type) < 0) + return -1; + + disasm_text_part_object_type.tp_new = PyType_GenericNew; + if (PyType_Ready (&disasm_text_part_object_type) < 0) + return -1; + + if (gdb_pymodule_addobject (gdb_disassembler_module, + "DisassemblerTextPart", + (PyObject *) &disasm_text_part_object_type) < 0) + return -1; + return 0; } @@ -1152,3 +1792,132 @@ PyTypeObject disasm_result_object_type = { disasmpy_result_init, /* tp_init */ 0, /* tp_alloc */ }; + +/* Describe the gdb.disassembler.DisassemblerPart type. This type exists + only as an abstract base-class for the various part sub-types. The + init method for this type throws an error. As such we don't both to + provide a tp_repr method for this parent class. */ + +PyTypeObject disasm_part_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerPart", /*tp_name*/ + sizeof (PyObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "GDB object, representing part of a disassembled instruction", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + disasmpy_part_init, /* tp_init */ + 0, /* tp_alloc */ +}; + +/* Describe the gdb.disassembler.DisassemblerTextPart type. */ + +PyTypeObject disasm_text_part_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerTextPart", /*tp_name*/ + sizeof (disasm_text_part_object_type), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + disasmpy_text_part_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + disasmpy_text_part_str, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "GDB object, representing a text part of an instruction", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + disasmpy_text_part_getset, /* tp_getset */ + &disasm_part_object_type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ +}; + +/* Describe the gdb.disassembler.DisassemblerAddressPart type. */ + +PyTypeObject disasm_addr_part_object_type = { + PyVarObject_HEAD_INIT (nullptr, 0) + "gdb.disassembler.DisassemblerAddressPart", /*tp_name*/ + sizeof (disasm_addr_part_object), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + disasmpy_addr_part_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + disasmpy_addr_part_str, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "GDB object, representing an address part of an instruction", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + disasmpy_addr_part_getset, /* tp_getset */ + &disasm_part_object_type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ +}; diff --git a/gdb/testsuite/gdb.python/py-disasm.exp b/gdb/testsuite/gdb.python/py-disasm.exp index 5cbf02f..304393f 100644 --- a/gdb/testsuite/gdb.python/py-disasm.exp +++ b/gdb/testsuite/gdb.python/py-disasm.exp @@ -69,6 +69,12 @@ set nop "(nop|nop\t0)" set unknown_error_pattern "unknown disassembler error \\(error = -1\\)" set addr_pattern "\r\n=> ${curr_pc_pattern} <\[^>\]+>:\\s+" set base_pattern "${addr_pattern}${nop}" + +# Helper proc to format a Python exception of TYPE with MSG. +proc make_exception_pattern { type msg } { + return "${::addr_pattern}Python Exception <class '$type'>: $msg\r\n\r\n${::unknown_error_pattern}" +} + set test_plans \ [list \ [list "" "${base_pattern}\r\n.*"] \ @@ -90,13 +96,40 @@ set test_plans \ [list "RethrowMemoryErrorDisassembler" "${addr_pattern}Cannot access memory at address $hex"] \ [list "ReadMemoryMemoryErrorDisassembler" "${addr_pattern}Cannot access memory at address ${curr_pc_pattern}"] \ [list "ReadMemoryGdbErrorDisassembler" "${addr_pattern}read_memory raised GdbError\r\n${unknown_error_pattern}"] \ - [list "ReadMemoryRuntimeErrorDisassembler" "${addr_pattern}Python Exception <class 'RuntimeError'>: read_memory raised RuntimeError\r\n\r\n${unknown_error_pattern}"] \ + [list "ReadMemoryRuntimeErrorDisassembler" \ + [make_exception_pattern "RuntimeError" \ + "read_memory raised RuntimeError"]] \ [list "ReadMemoryCaughtMemoryErrorDisassembler" "${addr_pattern}${nop}\r\n.*"] \ [list "ReadMemoryCaughtGdbErrorDisassembler" "${addr_pattern}${nop}\r\n.*"] \ [list "ReadMemoryCaughtRuntimeErrorDisassembler" "${addr_pattern}${nop}\r\n.*"] \ - [list "MemorySourceNotABufferDisassembler" "${addr_pattern}Python Exception <class 'TypeError'>: Result from read_memory is not a buffer\r\n\r\n${unknown_error_pattern}"] \ - [list "MemorySourceBufferTooLongDisassembler" "${addr_pattern}Python Exception <class 'ValueError'>: Buffer returned from read_memory is sized $decimal instead of the expected $decimal\r\n\r\n${unknown_error_pattern}"] \ - [list "ResultOfWrongType" "${addr_pattern}Python Exception <class 'TypeError'>: Result is not a DisassemblerResult.\r\n.*"]] + [list "MemorySourceNotABufferDisassembler" \ + [make_exception_pattern "TypeError" \ + "Result from read_memory is not a buffer"]] \ + [list "MemorySourceBufferTooLongDisassembler" \ + [make_exception_pattern "ValueError" \ + "Buffer returned from read_memory is sized $decimal instead of the expected $decimal"]] \ + [list "ResultOfWrongType" \ + [make_exception_pattern "TypeError" \ + "Result is not a DisassemblerResult."]] \ + [list "ErrorCreatingTextPart_NoArgs" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'style' \\(pos 1\\)"]] \ + [list "ErrorCreatingAddressPart_NoArgs" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'address' \\(pos 1\\)"]] \ + [list "ErrorCreatingTextPart_NoString" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'string' \\(pos 2\\)"]] \ + [list "ErrorCreatingTextPart_NoStyle" \ + [make_exception_pattern "TypeError" \ + "function missing required argument 'style' \\(pos 1\\)"]] \ + [list "All_Text_Part_Styles" "${addr_pattern}p1p2p3p4p5p6p7p8p9p10\r\n.*"] \ + [list "ErrorCreatingTextPart_StringAndParts" \ + [make_exception_pattern "ValueError" \ + "Cannot use 'string' and 'parts' when creating gdb\\.disassembler\\.DisassemblerResult\\."]] \ + [list "Build_Result_Using_All_Parts" \ + "${addr_pattern}fake\treg, ${curr_pc_pattern}(?: <\[^>\]+>)?, 123\r\n.*"] \ + ] # Now execute each test plan. foreach plan $test_plans { @@ -216,13 +249,48 @@ with_test_prefix "Bad DisassembleInfo creation" { "Error while executing Python code\\."] } -# Test that we can't inherit from the DisassemblerResult class. -gdb_test_multiline "Sub-class a breakpoint" \ - "python" "" \ - "class InvalidResultType(gdb.disassembler.DisassemblerResult):" "" \ - " def __init__(self):" "" \ - " pass" "" \ - "end" \ +# Some of the disassembler related types should not be sub-typed, +# check these now. +with_test_prefix "check inheritance" { + foreach_with_prefix type {gdb.disassembler.DisassemblerResult \ + gdb.disassembler.DisassemblerPart + gdb.disassembler.DisassemblerTextPart \ + gdb.disassembler.DisassemblerAddressPart} { + set type_ptn [string_to_regexp $type] + gdb_test_multiline "Sub-class a breakpoint" \ + "python" "" \ + "class InvalidResultType($type):" "" \ + " def __init__(self):" "" \ + " pass" "" \ + "end" \ + [multi_line \ + "TypeError: type '${type_ptn}' is not an acceptable base type" \ + "Error while executing Python code\\."] + } +} + + +# Test some error conditions when creating a DisassemblerResult object. +gdb_test "python result = gdb.disassembler.DisassemblerResult()" \ [multi_line \ - "TypeError: type 'gdb\\.disassembler\\.DisassemblerResult' is not an acceptable base type" \ - "Error while executing Python code\\."] + "TypeError: function missing required argument 'length' \\(pos 1\\)" \ + "Error while executing Python code\\."] \ + "try to create a DisassemblerResult without a length argument" + +foreach len {0 -1} { + gdb_test "python result = gdb.disassembler.DisassemblerResult($len)" \ + [multi_line \ + "ValueError: Length must be greater than 0\\." \ + "Error while executing Python code\\."] \ + "try to create a DisassemblerResult with length $len" +} + +# Check we can't directly create DisassemblerTextPart or +# DisassemblerAddressPart objects. +foreach type {DisassemblerTextPart DisassemblerAddressPart} { + gdb_test "python result = gdb.disassembler.${type}()" \ + [multi_line \ + "RuntimeError: Cannot create instances of DisassemblerPart\\." \ + "Error while executing Python code\\."] \ + "try to create an instance of ${type}" +} diff --git a/gdb/testsuite/gdb.python/py-disasm.py b/gdb/testsuite/gdb.python/py-disasm.py index 17a7e75..ec6b0e8 100644 --- a/gdb/testsuite/gdb.python/py-disasm.py +++ b/gdb/testsuite/gdb.python/py-disasm.py @@ -25,6 +25,26 @@ from gdb.disassembler import Disassembler, DisassemblerResult current_pc = None +def builtin_disassemble_wrapper(info): + result = gdb.disassembler.builtin_disassemble(info) + assert result.length > 0 + assert len(result.parts) > 0 + tmp_str = "" + for p in result.parts: + assert(p.string == str(p)) + tmp_str += p.string + assert tmp_str == result.string + return result + + +def check_building_disassemble_result(): + """Check that we can create DisassembleResult objects correctly.""" + + result = gdb.disassembler.DisassemblerResult() + + print("PASS") + + def is_nop(s): return s == "nop" or s == "nop\t0" @@ -70,7 +90,7 @@ class ShowInfoRepr(TestDisassembler): def disassemble(self, info): comment = "\t## " + repr(info) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) string = result.string + comment length = result.length return DisassemblerResult(length=length, string=string) @@ -94,7 +114,7 @@ class ShowInfoSubClassRepr(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) comment = "\t## " + repr(info) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) string = result.string + comment length = result.length return DisassemblerResult(length=length, string=string) @@ -106,7 +126,7 @@ class ShowResultRepr(TestDisassembler): output.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) comment = "\t## " + repr(result) string = result.string + comment length = result.length @@ -118,11 +138,11 @@ class ShowResultStr(TestDisassembler): resulting string in a comment within the disassembler output.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) comment = "\t## " + str(result) string = result.string + comment length = result.length - return DisassemblerResult(length=length, string=string) + return DisassemblerResult(length=length, string=string, parts=None) class GlobalPreInfoDisassembler(TestDisassembler): @@ -138,7 +158,7 @@ class GlobalPreInfoDisassembler(TestDisassembler): if not isinstance(ar, gdb.Architecture): raise gdb.GdbError("invalid architecture type") - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t## ad = 0x%x, ar = %s" % (ad, ar.name()) return DisassemblerResult(result.length, text) @@ -148,7 +168,7 @@ class GlobalPostInfoDisassembler(TestDisassembler): """Check the attributes of DisassembleInfo after disassembly has occurred.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) ad = info.address ar = info.architecture @@ -169,7 +189,7 @@ class GlobalReadDisassembler(TestDisassembler): adds them as a comment to the disassembler output.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) len = result.length str = "" for o in range(len): @@ -187,7 +207,7 @@ class GlobalAddrDisassembler(TestDisassembler): """Check the gdb.format_address method.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) arch = info.architecture addr = info.address program_space = info.progspace @@ -214,7 +234,7 @@ class GdbErrorLateDisassembler(TestDisassembler): """Raise a GdbError after calling the builtin disassembler.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) raise gdb.GdbError("GdbError after builtin disassembler") @@ -222,7 +242,7 @@ class RuntimeErrorLateDisassembler(TestDisassembler): """Raise a RuntimeError after calling the builtin disassembler.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) raise RuntimeError("RuntimeError after builtin disassembler") @@ -235,7 +255,7 @@ class MemoryErrorEarlyDisassembler(TestDisassembler): info.read_memory(1, -info.address + 2) except gdb.MemoryError: tag = "## AFTER ERROR" - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t" + tag return DisassemblerResult(result.length, text) @@ -245,7 +265,7 @@ class MemoryErrorLateDisassembler(TestDisassembler): before we return a result.""" def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) # The following read will throw an error. info.read_memory(1, -info.address + 2) return DisassemblerResult(1, "BAD") @@ -282,7 +302,7 @@ class TaggingDisassembler(TestDisassembler): self._tag = tag def disassemble(self, info): - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t## tag = %s" % self._tag return DisassemblerResult(result.length, text) @@ -307,7 +327,7 @@ class GlobalCachingDisassembler(TestDisassembler): and cache the DisassembleInfo so that it is not garbage collected.""" GlobalCachingDisassembler.cached_insn_disas.append(info) GlobalCachingDisassembler.cached_insn_disas.append(self.MyInfo(info)) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) text = result.string + "\t## CACHED" return DisassemblerResult(result.length, text) @@ -373,7 +393,7 @@ class ReadMemoryMemoryErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class ReadMemoryGdbErrorDisassembler(TestDisassembler): @@ -389,7 +409,7 @@ class ReadMemoryGdbErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class ReadMemoryRuntimeErrorDisassembler(TestDisassembler): @@ -405,7 +425,7 @@ class ReadMemoryRuntimeErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class ReadMemoryCaughtMemoryErrorDisassembler(TestDisassembler): @@ -422,7 +442,7 @@ class ReadMemoryCaughtMemoryErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) try: - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) except gdb.MemoryError: return None @@ -441,7 +461,7 @@ class ReadMemoryCaughtGdbErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) try: - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) except gdb.GdbError as e: if e.args[0] == "exception message": return None @@ -462,7 +482,7 @@ class ReadMemoryCaughtRuntimeErrorDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) try: - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) except RuntimeError as e: if e.args[0] == "exception message": return None @@ -479,7 +499,7 @@ class MemorySourceNotABufferDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class MemorySourceBufferTooLongDisassembler(TestDisassembler): @@ -501,7 +521,101 @@ class MemorySourceBufferTooLongDisassembler(TestDisassembler): def disassemble(self, info): info = self.MyInfo(info) - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) + + +class ErrorCreatingTextPart_NoArgs(TestDisassembler): + """Try to create a DisassemblerTextPart with no arguments.""" + + def disassemble(self, info): + part = info.text_part() + return None + + +class ErrorCreatingAddressPart_NoArgs(TestDisassembler): + """Try to create a DisassemblerAddressPart with no arguments.""" + + def disassemble(self, info): + part = info.address_part() + return None + + +class ErrorCreatingTextPart_NoString(TestDisassembler): + """Try to create a DisassemblerTextPart with no string argument.""" + + def disassemble(self, info): + part = info.text_part(gdb.disassembler.STYLE_TEXT) + return None + + +class ErrorCreatingTextPart_NoStyle(TestDisassembler): + """Try to create a DisassemblerTextPart with no string argument.""" + + def disassemble(self, info): + part = info.text_part(string="abc") + return None + + +class ErrorCreatingTextPart_StringAndParts(TestDisassembler): + """Try to create a DisassemblerTextPart with both a string and a parts list.""" + + def disassemble(self, info): + parts = [] + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "p1")) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "p2")) + + return DisassemblerResult(length=4, string="p1p2", parts=parts) + + +class All_Text_Part_Styles(TestDisassembler): + """Create text parts with all styles.""" + + def disassemble(self, info): + parts = [] + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "p1")) + parts.append(info.text_part(gdb.disassembler.STYLE_MNEMONIC, "p2")) + parts.append(info.text_part(gdb.disassembler.STYLE_SUB_MNEMONIC, "p3")) + parts.append(info.text_part(gdb.disassembler.STYLE_ASSEMBLER_DIRECTIVE, "p4")) + parts.append(info.text_part(gdb.disassembler.STYLE_REGISTER, "p5")) + parts.append(info.text_part(gdb.disassembler.STYLE_IMMEDIATE, "p6")) + parts.append(info.text_part(gdb.disassembler.STYLE_ADDRESS, "p7")) + parts.append(info.text_part(gdb.disassembler.STYLE_ADDRESS_OFFSET, "p8")) + parts.append(info.text_part(gdb.disassembler.STYLE_SYMBOL, "p9")) + parts.append(info.text_part(gdb.disassembler.STYLE_COMMENT_START, "p10")) + + result = builtin_disassemble_wrapper(info) + result = DisassemblerResult(length=result.length, parts=parts) + + tmp_str = ""; + for p in parts: + assert (p.string == str(p)) + tmp_str += str(p) + assert tmp_str == result.string + + return result + + +class Build_Result_Using_All_Parts(TestDisassembler): + """Disassemble an instruction and return a result that makes use of + text and address parts.""" + + def disassemble(self, info): + global current_pc + + parts = [] + parts.append(info.text_part(gdb.disassembler.STYLE_MNEMONIC, "fake")) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, "\t")) + parts.append(info.text_part(gdb.disassembler.STYLE_REGISTER, "reg")) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, ", ")) + addr_part = info.address_part(current_pc) + assert addr_part.address == current_pc + parts.append(addr_part) + parts.append(info.text_part(gdb.disassembler.STYLE_TEXT, ", ")) + parts.append(info.text_part(gdb.disassembler.STYLE_IMMEDIATE, "123")) + + result = builtin_disassemble_wrapper(info) + result = DisassemblerResult(length=result.length, parts=parts) + return result class BuiltinDisassembler(Disassembler): @@ -511,7 +625,7 @@ class BuiltinDisassembler(Disassembler): super().__init__("BuiltinDisassembler") def __call__(self, info): - return gdb.disassembler.builtin_disassemble(info) + return builtin_disassemble_wrapper(info) class AnalyzingDisassembler(Disassembler): @@ -606,7 +720,7 @@ class AnalyzingDisassembler(Disassembler): # Override the info object, this provides access to our # read_memory function. info = self.MyInfo(info, self._start, self._end, self._nop_bytes) - result = gdb.disassembler.builtin_disassemble(info) + result = builtin_disassemble_wrapper(info) # Record some informaiton about the first 'nop' instruction we find. if self._nop_index is None and is_nop(result.string): |