diff options
Diffstat (limited to 'bfd/bfd.doc')
-rwxr-xr-x | bfd/bfd.doc | 705 |
1 files changed, 705 insertions, 0 deletions
diff --git a/bfd/bfd.doc b/bfd/bfd.doc new file mode 100755 index 0000000..3e3183e --- /dev/null +++ b/bfd/bfd.doc @@ -0,0 +1,705 @@ +This file contains -*- Text -*-. + +BFD is a set of routines for reading and writing binary files. + +The user should call only the interface routines at the end of bfd.h. +The one I'm working out of is /4/gumby/bfd/bfd.h + + Sample "strip" program using BFD: + + #include "bfd.h" + + doit () + { + ibfd = bfd_openr(...) + obfd = bfd_openw(...) + bfd_check_format (ibfd, object); + bfd_set_format (obfd, object); + + bfd_set_arch_mach (obfd, ...) + bfd_set_start_address (obfd, ...) + etc... + + [optionally: + asymbol * foo = malloc (get_symtab_upper_bound (ibfd)); + bfd_canonicalize_symtab (ibfd, foo); + <sort foo, frob foo, etc, using asymbol def from bfd.h> + bfd_set_symtab (obfd, foo, updated_symbol_count); + ] + + bfd_map_over_sections (abfd, setup, NULL); + bfd_map_over_sections (abfd, cleaner, NULL); + + bfd_close (obfd); + bfd_close (ibfd); + } + + setup (ibfd, sect) + { + osect = make_section (obfd, bfd_section_name (ibfd, sect)); + bfd_set_section_size (obfd, osect, bfd_section_size (ibfd, sect)); + ... + } + + cleaner (ibfd, sect) + { + osect = bfd_get_section_by_name (obfd, + bfd_section_name (ibfd, sect)); + bfd_copy_section (ibfd, sect, obfd, osect); + [perhaps: bfd_set_reloc (osect, NULL, 0); ] + } + + + +BFD is a package for manipulating binary files required for developing +programs. It implements a group of structured operations designed to +shield the programmer from the underlying representation of these +binary files. It understands object (compiled) files, archive +libraries, and core files. It is designed to work in a variety of +target environments. + +To use the library, include bfd.h and link with libbfd.a. + +A bfd iteself is a representation for a particular file. It is opened +in a manner similar to a file; code then manipulates it rather than +the raw files. + +BFD makes a distinction between TARGETS (families of file formats) and +FORMATS (individual file formats). For instance, the "sun4os4" target +can handle core, object and archive formats of files. The exact +layout of the different formats depends on the target environment. + +The target "default" means the first one known (usually used for +environments that only support one format, or where the common format +is known at compile or link time). The target NULL means the one +specified at runtime in the environment variable GNUTARGET; if that is +null or not defined then the first entry in the target list is chosen +(on output), or all targets are searched (on input) to find a matching +one.. + +Most programs should use the target NULL. + +There is a way to get a list of the names of all the targets: +char** bfd_target_list () + This function returns a freshly-malloced list of all the + defined targets (or NULL if it could not malloc). The names + are read-only. You could use this to prompt the user, or + perhaps to error-check. + +char * bfd_format_string (bfd_format format) + This function will give you a printable, single-word description + (like "core" or "archive") for a bfd format. + +Error handling + +General rules: +funtions which are boolean return true on success and false on failure +(unless they're a predicate). Functions which return pointers to +objects return NULL on error. The specifics are documented with each +function. + +If a function fails, you should check the variable bfd_error. If the +value is no_error, then check the C variable errno just as you would +with any other program. The other values bfd_error may take on are +documented in bfd.h. + +If you would prefer a comprehensible string for the error message, use +the function bfd_errmsg: + char * bfd_errmsg (error_tag) +This function returns a read-only string which documents the error +code. If the error code is no_error then it will return a string +depending on the value of errno. + +bfd_perror() is like the perror() function except it understands +bfd_error. + +Operations on bfds themselves + +bfd * bfd_openr (char *filename, char *target); +bfd * bfd_fdopenr (int fd, char *target, char *filename); + + Open a binary file for reading. TARGET is the type of the file, + a char string like "sun4os4" or "elf". (Note this is not the + "function" of the file, e.g. an object versus a core file + versus an archive, but instead describes how all these files + are encoded.) Returns a new bfd or NULL upon failure. + +bfd * bfd_openw (char *filename, char *target); + + Open a file named `filename' for writing. If an existing + file has the same name, then it will be overwritten by a + successful bfd_close on the returned bfd. Will return either + a new bfd or NULL upon failure. + +boolean bfd_close (bfd *abfd); + + Close a BFD opened for either reading or writing. May involve + several filesystem operations, depending on the data format; + some things may not be known to the system until file-closing + time. Returns true if it successfully wrote the file, false + if not. A false return will not leave a partially-written + file behind with the name supplied to bfd_openw. + + On a bfd open for reading will generally successfully + complete. + + It is an error to call this on a file opened from inside an + archive. + + FIXME -- show which error codes may be recoverable and + followed by another call to bfd_close! + + +The defined formats are specified by the enumeration bfd_format. + +boolean bfd_check_format (bfd *abfd, bfd_format format); + + This routine must be called after a bfd_openr. It sets up + internal data structures based on the contents of the file. + It returns FALSE if the file is not really in the specified + format. + +boolean bfd_set_format (bfd *abfd, bfd_format format); + + This routine must be called after a bfd_openw. It sets up + internal data structures for the proper format of file. + It returns FALSE if that format is not supported for output + (e.g. core files). + +The following macros may be used to obtain information about a bfd: + +bfd_get_filename -- returns a pointer to a null-terminated string + which names the bfd's file, or NULL if that is not known. + Don't side-effect this string! +bfd_get_format -- returns the format code for the bfd. +bfd_get_target -- returns the string which names the bfd's target. +bfd_get_mtime -- returns an time_t indicating the modification time of an + input bfd, if that could be determined, or 0 of not. + +Object files have certain properties. For input bfds, these +properties may be read at any time. For output bfds you should set +them before you begin building any sections. + +bfd_vma bfd_get_start_address (bfd *abfd); + + Returns the address in an object file where execution will begin. + +boolean bfd_set_start_address (bfd *abfd, int vma); + + Set the address where execution will start in an object file. + + If the address you select is incorrect for your architecture + (for instance, if it's required to be on a page_boundary and + your supplied starting address is not, then you may get the + invalid_operation error. It is not always possible to + generate an error in this case. + +An object file has an architecture, which is the general instruction +set of the instructions that it contains. Architectures are defined in +enum bfd_architecture in bfd.h. New architectures can be added by +putting them in the enum, updating architectures.c, and adding code to +handle them for the object files that know that architecture. The +bfd_architecture values are not stored in files, but are only used +within the BFD library and its callers. + +An object file also has a machine type, which is the specific machine +within the architecture. For example, if the architecture is bfd_arch_m68k, +the Motorola 68000 series, then the machine type might be 68010, the mc68010 +chip. For architectures such as the SPARC where specific versions of +the architecture exist, the version number should probably be used. + +Particular object file formats may or may not store the machine architecture +and type. When copying an object file, you should copy these fields. +Most callers of BFD will not need to know the particular values that +these fields contain, but will instead propagate them from file to file, +or compare the architectures from two files. + +enum bfd_architecture bfd_get_architecture (bfd *abfd); +unsigned long bfd_get_machine (bfd *abfd); + + Get the machine type and architecture. + +boolean bfd_set_arch_mach (bfd *abfd, enum bfd_architecture arch, + unsigned long machine); + + Set the architecture and machine type. The result is true + if the object file can exactly represent the specified type. + The result is false otherwise. + +boolean bfd_arch_compatible (bfd *abfd, bfd *bbfd, + enum bfd_architecture *res_arch, + unsigned long *res_machine); + + Decides whether two BFD's contain compatible architectures and + machine types. If the result is TRUE and the res_arch and + res_machine pointers are non-NULL, the resulting "merged" + architecture and machine type are returned through the pointers. + A linker could call this to decide whether two object files + can be linked, and to deterine the arch and machine type of + the resulting file. + +char * bfd_printable_arch_mach (enum bfd_architecture arch, + unsigned long machine); + + Returns a printable string that represents the particular + combination of architecture and machine type. + +boolean bfd_scan_arch_mach (char *string, enum bfd_architecture *archp, + unsigned long *machinep); + + Examines a printable string and tries to extract an + architecture and machine type from it. The intended use is for + parsing specifications from the user, e.g. command line + arguments. The result is true if a known architecture was + found, and the resulting architecture and machine type are + stored through the argument pointers. Note that an + architecture scannable by this function might not be + representable by the particular object file format in use. + (i.e. bfd_set_arch_mach might return false). + + +There are also a number of boolean flags which apply to object bfds. + +flagword bfd_get_file_flags (bfd *abfd); + + returns a flagword containing the bfd's flags. + +boolean bfd_set_file_flags (bfd *abfd, flagword flags, + boolean on_or_off); + + sets (on_or_off == true) or clears (on_or_off == false) the flags + specified by flagword. All other flags are unaffected. + Some flag combinations don't make sense; It is not always + possible to detect them (since they may depend on other information). + Returns true if the flags could be modified as requested, + false if not. Upon a false return, no flags will have been + altered. + + +flagword bfd_applicable_file_flags (bfd *abfd); + + returns a flagword with bits set for all the flags which are + meaningful for the bfd. + +The flags are: + HAS_RELOC -- file contains unresolved relocation information. + EXEC_P -- file can be executed. These two may both be on in the + case of some dynamically-linked binaries. + HAS_LINENO -- has line number information. + HAS_DEBUG -- has debugging information. + HAS_SYMS -- has any symbols. + HAS_LOCALS -- has local symbols. + DYNAMIC -- binary is dynamically linked. + WP_TEXT -- text is write-protected + D_PAGED -- binary should be demand-paged + +These flags are one bit wide and may be OR-ed together with |. + +If you are building a large application with bfd there may be data +specific to your program that you may wish to associate with a bfd. +Rather than require you to build a parallel table structure, bfd +provides a void* pointer in each bfd for arbitrary user data. The +macro bfd_usrdata (bfd *abfd) extracts these data; you may set them +with = (ie bfd_usrdata (my_bfd) = frob_it (my_bfd, moon_phase);). + +Object and core files have sections. + +File sections are represented by opaque pointers. You may map over +the sections of a file or you may ask for one by name. Note that not +all files may have all the possible sections. + +Section pointers are valid from the time you get them until the bfd +to which they refer is closed. + +When doing output, you must set up all the file's sections before +outputting to any. All that means is that all the file's sections +must have already been created and their size set before output +commences. + +Each section contains some small information, plus three chunks of +data in the object file: contents, relocation, and line numbers. +In some file formats (e.g. a.out), the line number part is always +empty, and line number information (if any) is instead recorded in +the symbol table. + +sec_ptr bfd_get_section_by_name (bfd *abfd, char *name); + Returns a section named NAME, or NULL if none by that name + exists. Works on input and output bfds. + +sec_ptr bfd_make_section (bfd *abfd, char *name); + Creates a section named name in the output bfd abfd. + returns NULL if it cannot create the section (if, for instance, + the output format does not permit such a section). If a + section with that name already exists, it is returned; a new + one with the same name is NOT created. + +unsigned int bfd_count_sections (bfd *abfd) + + This function returns the number of sections in the bfd abfd. + +void bfd_map_over_sections (bfd *abfd, void (*operation)(), + void *user_storage); + + This is how you operate on all sections of an input file. + Pass in a function pointer. The function will be called for each + section of the file, in random order. It will be passed + three arguments: the bfd, the sec_ptr for the section, and + whatever was passed in as user_storage. + +char * bfd_section_name (bfd *abfd, sec_ptr ptr); + + Produces the name of a section, e.g. ".text" or ".data". + This will produce arbitrary names for files with extensible + section names (e.g. COFF, ELF) so don't assume that you will + only see a few values here. + +long bfd_section_size (bfd *abfd, sec_ptr ptr); + + The size of a section in bytes. Result == -1 for error. + +boolean bfd_set_section_size (bfd *abfd, sec_ptr section unsigned long size); + + Set the size of a section. This must be done before any data + transfer is done for the section. + +bfd_vma bfd_section_vma (bfd *abfd, sec_ptr ptr); + + Virtual memory address where a section "belongs". + +boolean bfd_set_section_vma (bfd *abfd, bfd_vma vma); + + Set the virtual memory address of a section. + +int bfd_get_section_alignment (bfd *abfd, sec_ptr ptr); + + returns the alignment of a section. If alignment is not + possible, return value is undefined. + +boolean bfd_set_section_alignment (bfd *abfd, sec_ptr ptr, int alignment) + + returns true if it can set the section to the requested value. + Alignment is an integer; it refers to the power of two + specifying the byte boundary we want (ie 0 is byte-aligned; 4 + is word aligned). If the requested alignment is not available + any existing value is unchanged. + +Sections have properties just as object files may: + +flagword bfd_get_section_flags (bfd *abfd, sec_ptr section); + + returns a flagword containing the section's flags. + +boolean bfd_set_section_flags (bfd *abfd, sec_ptr section, + flagword flags, boolean on_or_off); + + sets (on_or_off == true) or clears (on_or_off == false) the flags + specified by flagword. All other flags are unaffected. + Some flag combinations don't make sense; It is not always + possible to detect them (since they may depend on other information). + Returns true if the flags could me modified as requested, + false if not. Unpon a false return, no flags will have been + altered. + +flagword bfd_applicable_section_flags (bfd *abfd); + + returns a flagword with bits set for all the flags which are + meaningful for a section. + +The flags are: + + SEC_BALIGN -- segment can be byte-aligned. + SEC_RELOC -- segment should be relocated. + SEC_ALLOC -- when converted into a memory image with the intent of + constructing a runable process, memory space will be + allocated for this section. + SEC_LOAD -- when converted into a memory image with the intent of + constructing a runable process, section contents will be + copied from the object file into memory. When this flag + is set, SEC_ALLOC is guaranteed to also be set. + SEC_HAS_CONTENTS -- The contents of this section exist in the + object file. Sections whose contents do not exist in the + object file may still have their contents read. On read, + a segment filled with zeroes will be invented to satisfy + the read request. It is an error to attempt to set the + contents of a section that has no contents. + +These last three probably need some explanation. In a traditional, +native unix object format, there are three real sections, text, data, +and bss. The text section will be allocated memory on exec, and will +be loaded from file into memory on exec. So the flags for a +traditional unix text section would typically be at least (SEC_ALLOC | +SEC_LOAD | SEC_HAS_CONTENTS). The data section has basically these +same traits. The bss section, however is a little different. It is +not relocated, and it is not loaded from file on exec, but it is +allocated memory on exec. Thus, its flags would be more like +(SEC_ALLOC). It is possible to have a section which is the converse +of the bss section. That is, (SEC_HAS_CONTENTS & ~SEC_ALLOC). This +could be anything from profiling information or notes from one pass of +a toolchain to another to time and version stamp information. + +Note that the section flags currently lack information on position +dependance. + +boolean bfd_get_section_contents (bfd *abfd, sec_ptr section, + unsigned char *location, + int offset, int count); + + Stores count bytes from the section's contents starting at + offset from within those contents. The values are stored into + location. Returns true if it could do so. Supplying invalid + values for offset and count will produce unpredictable results. + +boolean bfd_set_section_contents (bfd *abfd, sec_ptr section, + unsigned char *location, + int offset, int count); + Stores count bytes from location into offset within the + section contents. You need not write all the contents contiguously + (that is, you may write words 5-7 followed by 0-4 if you + wish). However once you start writing into a section, any + other sections into which you have previously written are + considered finished, and you may not write in them any more. + +*** Line numbers *** + +bfd_get_section_lineno_size (bfd *abfd, sec_ptr section); + Returns how many bytes of line numbers are associated with this + section. + +bfd_set_section_lineno_size (bfd *abfd, sec_ptr section, unsigned long val); + Sets the number of bytes of line numbers that this section should + contain. + +boolean bfd_get_section_linenos (bfd *abfd, sec_ptr section, + unsigned char *location, + int offset, int count); + Same as get_section_contents, except that it works on the linenos + for this section. + +boolean bfd_set_section_linenos (bfd *abfd, sec_ptr section, + unsigned char *location, + int offset, int count); + Same as set_section_contents, except that it works on the linenos + for this section. + +As with files, you may associate arbitrary program-specific data with +a section of a bfd. The following two functions are provided for +manipulating these data: + +void * bfd_get_section_userdata (bfd *abfd, sec_ptr section) + Returns whatever was stored in section's user data, or NULL if nothing. + +boolean bfd_set_section_userdata (bfd *abfd, sec_ptr section, void *contents) + Set the section contents. Returns true if it can, false if not. + +Core files + +Core files are currently only supported for reading. + +Apart from opening them, looking at the various sections (generally +the .data, .stack, and .regs sections; maybe a .user_struct section +eventually), you can make some queries about the status of the core +file, detailed below. The ".regs" section contains the general and +floating point registers of the process that died, in some machine- +specific order and format "intended to be unsurprising to someone who +knows the machine". + +char * bfd_core_file_failing_command (bfd *abfd); + + The command name of the program that failed, creating the core file. + The result is NULL if BFD can't figure out what the failing command was. + +int bfd_core_file_failing_signal (bfd *abfd); + + The signal number which caused the program to die, causing the + core file to be created. It will be positive if valid. + +boolean core_file_matches_executable_p (bfd *core_bfd, bfd *exec_bfd); + + For debuggers, checks whether a core file "matches" (is likely to + have come from) an executable file. This will not be perfect on + most systems, but will just provide a way to reject gross mismatches. + +Archives. + +An archive is a special file which can contain other files. +Originally it was intended to be a general way to group files, the way +tar is today. But now it is used almost exclusively to hold object +files. + +An archive may be opened for reading or writing just like any other +bfd. Once it is open for reading you may obtain bfds for each of the +files contained within it with the following function: + +bfd * bfd_openr_next_archived_file (bfd *arch_bfd, bfd *last_file); + + If called with NULL as the second argument, returns the first + file contained in the archive arch_bfd. If called with a file + contained within arch_bfd, returns the one which follows that + one, or NULL if it was the last. Returns NULL also if the + bfd supplied as last_file did not come from the archive arch_bfd. + +Any bfd open for read may be placed in an output archive. When the +output archive is closed, the contents will be placed into the +archive. + +You control the order of files in an archive. You set the first one +with the following function: + +boolean bfd_set_archive_head (bfd *output_archive, bfd *new_head) + + This function sets the first file in the archive + output_archive to be the bfd new_head. + +bfd's contain a pointer called next, which is bfd *. It is used by +bfd_close when an archive is closed to decide which file should next +go into the archive. So to place a group of files into an archive, +open bfds for each of them, chain them together using the next pointer +in the order you desire (be sure to store NULL into the final one's +next pointer), then do bfd_set_archive_head with the head of the +chain. The next pointer may be freely smashed at any time; it is only +looked at when closing an output archive. + +bfds for files contained within archives are normal bfds; you can do +any input operations on them that you can do with a normal bfd. + +bfd_my_archive is a macro which takes an input bfd and returns NULL if +it lives in the filesystem and a bfd if it is contained in an archive. +In the latter case, the returned bfd is the archive itself. + +Archives containing only object files may have a "map" -- a table in +the front which maps external symbols to the files which contain them. + +Archive maps will refer only to object files; if an archive contains a +file which is not an archive that file will of course not appear in +the map. + +boolean bfd_has_map (bfd *archive_bfd) + + This macro takes a bfd of an archive and returns true or + false depending on whether the bfd has a map. For output + bfds this may be set to true or false, depending on whether + you want the map to be maintained or not. For some targets, + setting this to false will cause no map to be generated; for + others it will merely cause an empty map to be created, since + a map is required by that target. + +For archives with maps you may use the following function: + +int bfd_get_next_mapent (bfd *abfd, int prev, char **name) + + You may use this to step through all the entries in the archive + map. Supply BFD_NO_MORE_SYMBOLS as the 'prev' entry to get the + first entry; then use successive returned values from this + function to get the succeeding ones. The name of the next entry + will be stored through the pointer name. + + This function returns BFD_NO_MORE_SYMBOLS when there are no more + entries or on error. + +bfd * bfd_get_elt_at_index (abfd, int index) + + This function takes an index as returned by bfd_get_next_mapent + and returns the bfd which corresponds to that entry. Returns NULL + on error. + +Symbol and relocation information. + +Symbol-table information is the area of greatest incompatibility. +bfd has a canonical symbol representation; all formats are parsed into +and out of it. + +Note that canonicalize_symtab takes a pointer to an array of pointers +to canonical symbols. This is necessary so that the end of the array +can be marked with NULL. You may shuffle the pointers and you may +clobber the symbol contents. But don't move the symbols themselves. + +unsigned int bfd_get_symtab_upper_bound (bfd *abfd); + + Returns the maximum number of bytes that would be taken by + the output of canonicalize_symtab. Returns 0 on error. + +unsigned int bfd_canonicalize_symtab (bfd *abfd, asymbol **location); + + Produces a symbol table in canonical format at LOCATION, which + must be of size specified by get_symtab_upper_bound bytes. + Not all those bytes may be used. Returns the number of + symbol pointers written. Returns 0 upon error. + +boolean bfd_set_symtab (bfd *outbfd, asymbol **location, + unsigned int symcount); + + Takes a generic symbol table and an output bfd. Used to set + the symbol table for an output bfd. Do not change the table + after using this function (although the storage may be + reclaimed once the bfd has been closed). + +If you're done with the symol table you can tell bfd about it by +calling bfd_reclaim_symbol_table, which takes a bfd. Calling this +function will also reclaim any relocation entries you may have +requested. If you don't use this function bfd will keep around all +symbol information until the bfd is closed. + +Similarly, relocations have a canonical format. See the file bfd.h for +the exact definition. It is similar to the sun-4 relocation format. +Please note that: +o - Each relocation has a pointer to a generic symbol. +o - Not all values of reloc_type are supported for all targets. There + is a bitvector which explains which are; you can index into it by + relocation type. The macro which extracts it is bfd_valid_reloc_types. + +Since relocation information is saved on a per-section basis, the +interface is slightly different from that of the symbol table: + +unsigned int get_reloc_upper_bound (bfd *abfd, sec_ptr asect); + + Returns the maximum number of bytes that would be taken by + the output of canonicalize_reloc. Returns 0 on error. + +unsigned int canonicalize_reloc (bfd *abfd, sec_ptr asect, arelent *location); + + Produces a relocation table in canonical format at LOCATION, + which must be of size specified by get_reloc_upper_bound + bytes. Not all those bytes may be used. Returns the number + of entries written. Returns 0 upon error. + +boolean bfd_set_reloc (bfd *outbfd, sec_ptr asect, arelent *location, + unsigned int count); + + Takes a generic reloc table and an output bfd. Used to set + the reloc table for an output bfd. Do not change the table + after using this function (although the storage may be + reclaimed once the bfd has been closed). + +Byte-swapping + +Unfortunately, not all machines have the same byte order. Worse, +storage layout is in general highly machine-dependent. Although bfd +can hide that from you in most cases, it cannot do so with the section +contents, since they are totally uninterpreted. Hence you must +byte-swap those data yourself. This is not usually much of an issue +since you should just generate your data in the correct byte order. + +[THIS IS WRONG AND ALSO DOES NOT REFLECT THE CODE WHICH IS CORRECT] + +Fortunately, bfd can tell if byte-swapping or realignment is required +at all! The macro bfd_bit_twiddle_required takes a pointer to a bfd +and returns true if byte-swapping is required, false if not. + +However if you don't wish to check this you may just use the following +functions which will do the conversions required: + + +long bfd_getlong (bfd *abfd, unsigned char *ptr); + bfd_putlong (bfd *abfd, unsigned char *ptr, long time); + +short bfd_getshort (bfd *abfd, unsigned char *ptr); + bfd_putshort (bfd *abfd, unsigned char *ptr, short stop); + + These functions take a pointer that points to data which is, + or will be, part of a section contents. They extract numbers + from the data, or insert numbers into the data. The argument + or result is in the host's number format; the data stored at + the pointer or retrieved from it is in the target's number format. + Typically this transfer is either a no-op or is a byte-swap; + sometimes it involves an access to a "misaligned" location from + the host's point of view.. |