# Copyright 2010-2013 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Return true if the target supports DWARF-2 and uses gas. # For now pick a sampling of likely targets. proc dwarf2_support {} { if {[istarget *-*-linux*] || [istarget *-*-gnu*] || [istarget *-*-elf*] || [istarget *-*-openbsd*] || [istarget arm*-*-eabi*] || [istarget arm*-*-symbianelf*] || [istarget powerpc-*-eabi*]} { return 1 } return 0 } # A DWARF assembler. # # All the variables in this namespace are private to the # implementation. Also, any procedure whose name starts with "_" is # private as well. Do not use these. # # Exported functions are documented at their definition. # # In addition to the hand-written functions documented below, this # module automatically generates a function for each DWARF tag. For # most tags, two forms are made: a full name, and one with the # "DW_TAG_" prefix stripped. For example, you can use either # 'DW_TAG_compile_unit' or 'compile_unit' interchangeably. # # There are two exceptions to this rule: DW_TAG_variable and # DW_TAG_namespace. For these, the full name must always be used, # as the short name conflicts with Tcl builtins. (Should future # versions of Tcl or DWARF add more conflicts, this list will grow. # If you want to be safe you should always use the full names.) # # Each tag procedure is defined like: # # proc DW_TAG_mumble {{attrs {}} {children {}}} { ... } # # ATTRS is an optional list of attributes. # It is run through 'subst' in the caller's context before processing. # # Each attribute in the list has one of two forms: # 1. { NAME VALUE } # 2. { NAME VALUE FORM } # # In each case, NAME is the attribute's name. # This can either be the full name, like 'DW_AT_name', or a shortened # name, like 'name'. These are fully equivalent. # # If FORM is given, it should name a DW_FORM_ constant. # This can either be the short form, like 'DW_FORM_addr', or a # shortened version, like 'addr'. If the form is given, VALUE # is its value; see below. In some cases, additional processing # is done; for example, DW_FORM_strp manages the .debug_str # section automatically. # # If FORM is 'SPECIAL_expr', then VALUE is treated as a location # expression. The effective form is then DW_FORM_block, and VALUE # is passed to the (internal) '_location' proc to be translated. # This proc implements a miniature DW_OP_ assembler. # # If FORM is not given, it is guessed: # * If VALUE starts with the "@" character, the rest of VALUE is # looked up as a DWARF constant, and DW_FORM_sdata is used. For # example, '@DW_LANG_c89' could be used. # * If VALUE starts with the ":" character, then it is a label # reference. The rest of VALUE is taken to be the name of a label, # and DW_FORM_ref4 is used. See 'new_label' and 'define_label'. # * Otherwise, VALUE is taken to be a string and DW_FORM_string is # used. # More form-guessing functionality may be added. # # CHILDREN is just Tcl code that can be used to define child DIEs. It # is evaluated in the caller's context. # # Currently this code is missing nice support for CFA handling, and # probably other things as well. namespace eval Dwarf { # True if the module has been initialized. variable _initialized 0 # Constants from dwarf2.h. variable _constants # DW_AT short names. variable _AT # DW_FORM short names. variable _FORM # DW_OP short names. variable _OP # The current output file. variable _output_file # Note: The _cu_ values here also apply to type units (TUs). # Think of a TU as a special kind of CU. # Current CU count. variable _cu_count # The current CU's base label. variable _cu_label # The current CU's version. variable _cu_version # The current CU's address size. variable _cu_addr_size # The current CU's offset size. variable _cu_offset_size # Label generation number. variable _label_num # The deferred output array. The index is the section name; the # contents hold the data for that section. variable _deferred_output # If empty, we should write directly to the output file. # Otherwise, this is the name of a section to write to. variable _defer # The next available abbrev number in the current CU's abbrev # table. variable _abbrev_num # The string table for this assembly. The key is the string; the # value is the label for that string. variable _strings proc _process_one_constant {name value} { variable _constants variable _AT variable _FORM variable _OP set _constants($name) $value if {![regexp "^DW_(\[A-Z\]+)_(\[A-Za-z0-9_\]+)$" $name \ ignore prefix name2]} { error "non-matching name: $name" } if {$name2 == "lo_user" || $name2 == "hi_user"} { return } # We only try to shorten some very common things. # FIXME: CFA? switch -exact -- $prefix { TAG { # Create two procedures for the tag. These call # _handle_DW_TAG with the full tag name baked in; this # does all the actual work. proc $name {{attrs {}} {children {}}} \ "_handle_DW_TAG $name \$attrs \$children" # Filter out ones that are known to clash. if {$name2 == "variable" || $name2 == "namespace"} { set name2 "tag_$name2" } if {[info commands $name2] != {}} { error "duplicate proc name: from $name" } proc $name2 {{attrs {}} {children {}}} \ "_handle_DW_TAG $name \$attrs \$children" } AT { set _AT($name2) $name } FORM { set _FORM($name2) $name } OP { set _OP($name2) $name } default { return } } } proc _read_constants {} { global srcdir hex decimal variable _constants # DWARF name-matching regexp. set dwrx "DW_\[a-zA-Z0-9_\]+" # Whitespace regexp. set ws "\[ \t\]+" set fd [open [file join $srcdir .. .. include dwarf2.h]] while {![eof $fd]} { set line [gets $fd] if {[regexp -- "^${ws}($dwrx)${ws}=${ws}($hex|$decimal),?$" \ $line ignore name value ignore2]} { _process_one_constant $name $value } } close $fd set fd [open [file join $srcdir .. .. include dwarf2.def]] while {![eof $fd]} { set line [gets $fd] if {[regexp -- \ "^DW_\[A-Z_\]+${ws}\\(($dwrx),${ws}($hex|$decimal)\\)$" \ $line ignore name value ignore2]} { _process_one_constant $name $value } } close $fd set _constants(SPECIAL_expr) $_constants(DW_FORM_block) } proc _quote {string} { # FIXME return "\"${string}\\0\"" } proc _handle_DW_FORM {form value} { switch -exact -- $form { DW_FORM_string { _op .ascii [_quote $value] } DW_FORM_flag_present { # We don't need to emit anything. } DW_FORM_data4 - DW_FORM_ref4 { _op .4byte $value } DW_FORM_ref_addr { variable _cu_offset_size variable _cu_version variable _cu_addr_size if {$_cu_version == 2} { set size $_cu_addr_size } else { set size $_cu_offset_size } _op .${size}byte $value } DW_FORM_ref1 - DW_FORM_flag - DW_FORM_data1 { _op .byte $value } DW_FORM_sdata { _op .sleb128 $value } DW_FORM_ref_udata - DW_FORM_udata { _op .uleb128 $value } DW_FORM_addr { variable _cu_addr_size _op .${_cu_addr_size}byte $value } DW_FORM_data2 - DW_FORM_ref2 { _op .2byte $value } DW_FORM_data8 - DW_FORM_ref8 - DW_FORM_ref_sig8 { _op .8byte $value } DW_FORM_strp { variable _strings variable _cu_offset_size if {![info exists _strings($value)]} { set _strings($value) [new_label strp] _defer_output .debug_string { define_label $_strings($value) _op .ascii [_quote $value] } } _op .${_cu_offset_size}byte $_strings($value) "strp: $value" } SPECIAL_expr { set l1 [new_label "expr_start"] set l2 [new_label "expr_end"] _op .uleb128 "$l2 - $l1" "expression" define_label $l1 _location $value define_label $l2 } DW_FORM_block2 - DW_FORM_block4 - DW_FORM_block - DW_FORM_block1 - DW_FORM_ref2 - DW_FORM_indirect - DW_FORM_sec_offset - DW_FORM_exprloc - DW_FORM_GNU_addr_index - DW_FORM_GNU_str_index - DW_FORM_GNU_ref_alt - DW_FORM_GNU_strp_alt - default { error "unhandled form $form" } } } proc _guess_form {value varname} { upvar $varname new_value switch -exact -- [string range $value 0 0] { @ { # Constant reference. variable _constants set new_value $_constants([string range $value 1 end]) # Just the simplest. return DW_FORM_sdata } : { # Label reference. variable _cu_label set new_value "[string range $value 1 end] - $_cu_label" return DW_FORM_ref4 } default { return DW_FORM_string } } } # Map NAME to its canonical form. proc _map_name {name ary} { variable $ary if {[info exists ${ary}($name)]} { set name [set ${ary}($name)] } return $name } proc _handle_DW_TAG {tag_name {attrs {}} {children {}}} { variable _abbrev_num variable _constants set has_children [expr {[string length $children] > 0}] set my_abbrev [incr _abbrev_num] # We somewhat wastefully emit a new abbrev entry for each tag. # There's no reason for this other than laziness. _defer_output .debug_abbrev { _op .uleb128 $my_abbrev "Abbrev start" _op .uleb128 $_constants($tag_name) $tag_name _op .byte $has_children "has_children" } _op .uleb128 $my_abbrev "Abbrev ($tag_name)" foreach attr $attrs { set attr_name [_map_name [lindex $attr 0] _AT] set attr_value [uplevel 2 [list subst [lindex $attr 1]]] if {[llength $attr] > 2} { set attr_form [lindex $attr 2] } else { set attr_form [_guess_form $attr_value attr_value] } set attr_form [_map_name $attr_form _FORM] _handle_DW_FORM $attr_form $attr_value _defer_output .debug_abbrev { _op .uleb128 $_constants($attr_name) $attr_name _op .uleb128 $_constants($attr_form) $attr_form } } _defer_output .debug_abbrev { # Terminator. _op .byte 0x0 Terminator _op .byte 0x0 Terminator } if {$has_children} { uplevel 2 $children # Terminate children. _op .byte 0x0 "Terminate children" } } proc _emit {string} { variable _output_file variable _defer variable _deferred_output if {$_defer == ""} { puts $_output_file $string } else { append _deferred_output($_defer) ${string}\n } } proc _section {name} { _emit " .section $name" } proc _defer_output {section body} { variable _defer variable _deferred_output set old_defer $_defer set _defer $section if {![info exists _deferred_output($_defer)]} { set _deferred_output($_defer) "" _section $section } uplevel $body set _defer $old_defer } proc _defer_to_string {body} { variable _defer variable _deferred_output set old_defer $_defer set _defer temp set _deferred_output($_defer) "" uplevel $body set result $_deferred_output($_defer) unset _deferred_output($_defer) set _defer $old_defer return $result } proc _write_deferred_output {} { variable _output_file variable _deferred_output foreach section [array names _deferred_output] { # The data already has a newline. puts -nonewline $_output_file $_deferred_output($section) } # Save some memory. unset _deferred_output } proc _op {name value {comment ""}} { set text " ${name} ${value}" if {$comment != ""} { # Try to make stuff line up nicely. while {[string length $text] < 40} { append text " " } append text "/* ${comment} */" } _emit $text } proc _compute_label {name} { return ".L${name}" } # Return a name suitable for use as a label. If BASE_NAME is # specified, it is incorporated into the label name; this is to # make debugging the generated assembler easier. If BASE_NAME is # not specified a generic default is used. This proc does not # define the label; see 'define_label'. 'new_label' attempts to # ensure that label names are unique. proc new_label {{base_name label}} { variable _label_num return [_compute_label ${base_name}[incr _label_num]] } # Define a label named NAME. Ordinarily, NAME comes from a call # to 'new_label', but this is not required. proc define_label {name} { _emit "${name}:" } # Declare a global label. This is typically used to refer to # labels defined in other files, for example a function defined in # a .c file. proc extern {args} { foreach name $args { _op .global $name } } # A higher-level interface to label handling. # # ARGS is a list of label descriptors. Each one is either a # single element, or a list of two elements -- a name and some # text. For each descriptor, 'new_label' is invoked. If the list # form is used, the second element in the list is passed as an # argument. The label name is used to define a variable in the # enclosing scope; this can be used to refer to the label later. # The label name is also used to define a new proc whose name is # the label name plus a trailing ":". This proc takes a body as # an argument and can be used to define the label at that point; # then the body, if any, is evaluated in the caller's context. # # For example: # # declare_labels int_label # something { ... $int_label } ;# refer to the label # int_label: constant { ... } ;# define the label proc declare_labels {args} { foreach arg $args { set name [lindex $arg 0] set text [lindex $arg 1] upvar $name label_var if {$text == ""} { set label_var [new_label] } else { set label_var [new_label $text] } proc ${name}: {args} [format { define_label %s uplevel $args } $label_var] } } # This is a miniature assembler for location expressions. It is # suitable for use in the attributes to a DIE. Its output is # prefixed with "=" to make it automatically use DW_FORM_block. # BODY is split by lines, and each line is taken to be a list. # (FIXME should use 'info complete' here.) # Each list's first element is the opcode, either short or long # forms are accepted. # FIXME argument handling # FIXME move docs proc _location {body} { variable _constants foreach line [split $body \n] { if {[lindex $line 0] == ""} { continue } set opcode [_map_name [lindex $line 0] _OP] _op .byte $_constants($opcode) $opcode switch -exact -- $opcode { DW_OP_addr { variable _cu_addr_size _op .${_cu_addr_size}byte [lindex $line 1] } DW_OP_const1u - DW_OP_const1s { _op .byte [lindex $line 1] } DW_OP_const2u - DW_OP_const2s { _op .2byte [lindex $line 1] } DW_OP_const4u - DW_OP_const4s { _op .4byte [lindex $line 1] } DW_OP_const8u - DW_OP_const8s { _op .8byte [lindex $line 1] } DW_OP_constu { _op .uleb128 [lindex $line 1] } DW_OP_consts { _op .sleb128 [lindex $line 1] } default { if {[llength $line] > 1} { error "Unimplemented: operands in location for $opcode" } } } } } # Emit a DWARF CU. # IS_64 is a boolean which is true if you want to emit 64-bit # DWARF, and false for 32-bit DWARF. # VERSION is the DWARF version number to emit. # ADDR_SIZE is the size of addresses in bytes. # BODY is Tcl code that emits the DIEs which make up the body of # the CU. It is evaluated in the caller's context. proc cu {is_64 version addr_size body} { variable _cu_count variable _abbrev_num variable _cu_label variable _cu_version variable _cu_addr_size variable _cu_offset_size set _cu_version $version if {$is_64} { set _cu_offset_size 8 } else { set _cu_offset_size 4 } set _cu_addr_size $addr_size _section .debug_info set cu_num [incr _cu_count] set my_abbrevs [_compute_label "abbrev${cu_num}_begin"] set _abbrev_num 1 set _cu_label [_compute_label "cu${cu_num}_begin"] set start_label [_compute_label "cu${cu_num}_start"] set end_label [_compute_label "cu${cu_num}_end"] define_label $_cu_label if {$is_64} { _op .4byte 0xffffffff _op .8byte "$end_label - $start_label" } else { _op .4byte "$end_label - $start_label" } define_label $start_label _op .2byte $version Version _op .4byte $my_abbrevs Abbrevs _op .byte $addr_size "Pointer size" _defer_output .debug_abbrev { define_label $my_abbrevs } uplevel $body _defer_output .debug_abbrev { # Emit the terminator. _op .byte 0x0 Terminator _op .byte 0x0 Terminator } define_label $end_label } # Emit a DWARF TU. # IS_64 is a boolean which is true if you want to emit 64-bit # DWARF, and false for 32-bit DWARF. # VERSION is the DWARF version number to emit. # ADDR_SIZE is the size of addresses in bytes. # SIGNATURE is the 64-bit signature of the type. # TYPE_LABEL is the label of the type defined by this TU. # BODY is Tcl code that emits the DIEs which make up the body of # the CU. It is evaluated in the caller's context. proc tu {is_64 version addr_size signature type_label body} { variable _cu_count variable _abbrev_num variable _cu_label variable _cu_version variable _cu_addr_size variable _cu_offset_size set _cu_version $version if {$is_64} { set _cu_offset_size 8 } else { set _cu_offset_size 4 } set _cu_addr_size $addr_size _section .debug_types set cu_num [incr _cu_count] set my_abbrevs [_compute_label "abbrev${cu_num}_begin"] set _abbrev_num 1 set _cu_label [_compute_label "cu${cu_num}_begin"] set start_label [_compute_label "cu${cu_num}_start"] set end_label [_compute_label "cu${cu_num}_end"] define_label $_cu_label if {$is_64} { _op .4byte 0xffffffff _op .8byte "$end_label - $start_label" } else { _op .4byte "$end_label - $start_label" } define_label $start_label _op .2byte $version Version _op .4byte $my_abbrevs Abbrevs _op .byte $addr_size "Pointer size" _op .8byte $signature Signature uplevel declare_labels $type_label upvar $type_label my_type_label if {$is_64} { _op .8byte "$my_type_label - $_cu_label" } else { _op .4byte "$my_type_label - $_cu_label" } _defer_output .debug_abbrev { define_label $my_abbrevs } uplevel $body _defer_output .debug_abbrev { # Emit the terminator. _op .byte 0x0 Terminator _op .byte 0x0 Terminator } define_label $end_label } proc _empty_array {name} { upvar $name the_array catch {unset the_array} set the_array(_) {} unset the_array(_) } # The top-level interface to the DWARF assembler. # FILENAME is the name of the file where the generated assembly # code is written. # BODY is Tcl code to emit the assembly. It is evaluated via # "eval" -- not uplevel as you might expect, because it is # important to run the body in the Dwarf namespace. # # A typical invocation is something like: # Dwarf::assemble $file { # cu 0 2 8 { # compile_unit { # ... # } # } # cu 0 2 8 { # ... # } # } proc assemble {filename body} { variable _initialized variable _output_file variable _deferred_output variable _defer variable _label_num variable _strings variable _cu_count if {!$_initialized} { _read_constants set _initialized 1 } set _output_file [open $filename w] set _cu_count 0 _empty_array _deferred_output set _defer "" set _label_num 0 _empty_array _strings # Not "uplevel" here, because we want to evaluate in this # namespace. This is somewhat bad because it means we can't # readily refer to outer variables. eval $body _write_deferred_output catch {close $_output_file} set _output_file {} } }