diff options
Diffstat (limited to 'bfd')
-rw-r--r-- | bfd/ChangeLog | 35 | ||||
-rw-r--r-- | bfd/elf32-spu.c | 1213 | ||||
-rw-r--r-- | bfd/elf32-spu.h | 5 |
3 files changed, 1206 insertions, 47 deletions
diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 9ecf6f7..b8afd28 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,38 @@ +2008-04-08 Alan Modra <amodra@bigpond.net.au> + + * elf32-spu.c: Include libiberty.h. + (struct spu_link_hash_table): Add local_stire, overlay_fixed, reserved, + non_ovly_stub, spu_elf_load_ovl_mgr, spu_elf_open_overlay_script, + spu_elf_relink, auto_overlay fields. + (AUTO_OVERLAY, AUTO_RELINK, OVERLAY_RODATA): Define. + (needs_ovl_stub): Flip test so that call to non-function warning + is emitted during relocate_section rather than earlier. + (spu_elf_check_vma): Stash --auto-overlay parameters, and clear + auto_overlay if no section exceeds local store. + (struct call_info): Add count, max_depth, is_pasted fields. + (struct function_info): Add rodata, last_caller, call_count, + depth, new visit flags. + (insert_callee): Increment call count. + (copy_callee): New function. + (mark_functions_via_relocs): Investigate all reloc types to count + possible function pointer stubs for --auto-overlay. Track + last_caller and increment function call_count. + (pasted_function): Insert a "call" into call info for pasted section. + (remove_cycles): Track max depth of calls. Don't emit call graph + pruning warning for --auto-overlay. + (build_call_tree): Don't transfer_calls for --auto-overlay. + Adjust remove_cycles call. + (sort_calls, sort_lib, sort_bfds): New functions. + (struct _mos_param, struct _uos_param, struct _cl_param): New. + (mark_overlay_section, unmark_overlay_section): New functions. + (collect_lib_sectios, auto_ovl_lib_functions): New functions. + (collect_overlays, find_pasted_call): New functions. + (sum_stack): Deal with is_pasted "calls". Exit before printing + when --auto-overlay. + (spu_elf_auto_overlay): New function. + (spu_elf_final_link): Call spu_elf_auto_overlay. + * elf32-spu.h (spu_elf_check_vma): Update prototype. + 2008-04-07 Alan Modra <amodra@bigpond.net.au> * elf32-spu.c (allocate_spuear_stubs): Ensure _SPUEAR_ symbol diff --git a/bfd/elf32-spu.c b/bfd/elf32-spu.c index 85db32f..c65a88b 100644 --- a/bfd/elf32-spu.c +++ b/bfd/elf32-spu.c @@ -19,6 +19,7 @@ 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #include "sysdep.h" +#include "libiberty.h" #include "bfd.h" #include "bfdlink.h" #include "libbfd.h" @@ -278,6 +279,29 @@ struct spu_link_hash_table /* Total number of overlays. */ unsigned int num_overlays; + /* How much memory we have. */ + unsigned int local_store; + /* Local store --auto-overlay should reserve for non-overlay + functions and data. */ + unsigned int overlay_fixed; + /* Local store --auto-overlay should reserve for stack and heap. */ + unsigned int reserved; + /* Count of overlay stubs needed in non-overlay area. */ + unsigned int non_ovly_stub; + + /* Stash various callbacks for --auto-overlay. */ + void (*spu_elf_load_ovl_mgr) (void); + FILE *(*spu_elf_open_overlay_script) (void); + void (*spu_elf_relink) (void); + + /* Bit 0 set if --auto-overlay. + Bit 1 set if --auto-relink. + Bit 2 set if --overlay-rodata. */ + unsigned int auto_overlay : 3; +#define AUTO_OVERLAY 1 +#define AUTO_RELINK 2 +#define OVERLAY_RODATA 4 + /* Set if we should emit symbols for stubs. */ unsigned int emit_stub_syms:1; @@ -721,7 +745,7 @@ needs_ovl_stub (struct elf_link_hash_entry *h, branch = TRUE; if ((contents[0] & 0xfd) == 0x31 && sym_type != STT_FUNC - && contents == insn) + && contents != insn) { /* It's common for people to write assembly and forget to give function symbols the right type. Handle @@ -1490,15 +1514,33 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms) } /* Check that all loadable section VMAs lie in the range - LO .. HI inclusive. */ + LO .. HI inclusive, and stash some parameters for --auto-overlay. */ asection * -spu_elf_check_vma (struct bfd_link_info *info, bfd_vma lo, bfd_vma hi) +spu_elf_check_vma (struct bfd_link_info *info, + int auto_overlay, + unsigned int lo, + unsigned int hi, + unsigned int overlay_fixed, + unsigned int reserved, + void (*spu_elf_load_ovl_mgr) (void), + FILE *(*spu_elf_open_overlay_script) (void), + void (*spu_elf_relink) (void)) { struct elf_segment_map *m; unsigned int i; + struct spu_link_hash_table *htab = spu_hash_table (info); bfd *abfd = info->output_bfd; + if (auto_overlay & AUTO_OVERLAY) + htab->auto_overlay = auto_overlay; + htab->local_store = hi + 1 - lo; + htab->overlay_fixed = overlay_fixed; + htab->reserved = reserved; + htab->spu_elf_load_ovl_mgr = spu_elf_load_ovl_mgr; + htab->spu_elf_open_overlay_script = spu_elf_open_overlay_script; + htab->spu_elf_relink = spu_elf_relink; + for (m = elf_tdata (abfd)->segment_map; m != NULL; m = m->next) if (m->p_type == PT_LOAD) for (i = 0; i < m->count; i++) @@ -1508,6 +1550,8 @@ spu_elf_check_vma (struct bfd_link_info *info, bfd_vma lo, bfd_vma hi) || m->sections[i]->vma + m->sections[i]->size - 1 > hi)) return m->sections[i]; + /* No need for overlays if it all fits. */ + htab->auto_overlay = 0; return NULL; } @@ -1643,7 +1687,10 @@ struct call_info { struct function_info *fun; struct call_info *next; + unsigned int count; + unsigned int max_depth; unsigned int is_tail : 1; + unsigned int is_pasted : 1; }; struct function_info @@ -1660,21 +1707,34 @@ struct function_info } u; /* Function section. */ asection *sec; + asection *rodata; + /* Where last called from, and number of sections called from. */ + asection *last_caller; + unsigned int call_count; /* Address range of (this part of) function. */ bfd_vma lo, hi; /* Stack usage. */ int stack; + /* Distance from root of call tree. Tail and hot/cold branches + count as one deeper. We aren't counting stack frames here. */ + unsigned int depth; /* Set if global symbol. */ unsigned int global : 1; /* Set if known to be start of function (as distinct from a hunk in hot/cold section. */ unsigned int is_func : 1; - /* Flags used during call tree traversal. */ - unsigned int visit1 : 1; + /* Set if not a root node. */ unsigned int non_root : 1; + /* Flags used during call tree traversal. It's cheaper to replicate + the visit flags than have one which needs clearing after a traversal. */ + unsigned int visit1 : 1; unsigned int visit2 : 1; unsigned int marking : 1; unsigned int visit3 : 1; + unsigned int visit4 : 1; + unsigned int visit5 : 1; + unsigned int visit6 : 1; + unsigned int visit7 : 1; }; struct spu_elf_stack_info @@ -1934,7 +1994,9 @@ find_function (asection *sec, bfd_vma offset, struct bfd_link_info *info) return NULL; } -/* Add CALLEE to CALLER call list if not already present. */ +/* Add CALLEE to CALLER call list if not already present. Return TRUE + if CALLEE was new. If this function return FALSE, CALLEE should + be freed. */ static bfd_boolean insert_callee (struct function_info *caller, struct call_info *callee) @@ -1952,6 +2014,7 @@ insert_callee (struct function_info *caller, struct call_info *callee) p->fun->start = NULL; p->fun->is_func = TRUE; } + p->count += 1; /* Reorder list so most recent call is first. */ *pp = p->next; p->next = caller->call_list; @@ -1959,10 +2022,26 @@ insert_callee (struct function_info *caller, struct call_info *callee) return FALSE; } callee->next = caller->call_list; + callee->count += 1; caller->call_list = callee; return TRUE; } +/* Copy CALL and insert the copy into CALLER. */ + +static bfd_boolean +copy_callee (struct function_info *caller, const struct call_info *call) +{ + struct call_info *callee; + callee = bfd_malloc (sizeof (*callee)); + if (callee == NULL) + return FALSE; + *callee = *call; + if (!insert_callee (caller, callee)) + free (callee); + return TRUE; +} + /* We're only interested in code sections. Testing SEC_IN_MEMORY excludes overlay stub sections. */ @@ -2015,15 +2094,19 @@ mark_functions_via_relocs (asection *sec, Elf_Internal_Sym *sym; struct elf_link_hash_entry *h; bfd_vma val; - unsigned char insn[4]; - bfd_boolean is_call; + bfd_boolean reject, is_call; struct function_info *caller; struct call_info *callee; + reject = FALSE; r_type = ELF32_R_TYPE (irela->r_info); if (r_type != R_SPU_REL16 && r_type != R_SPU_ADDR16) - continue; + { + reject = TRUE; + if (!(call_tree && spu_hash_table (info)->auto_overlay)) + continue; + } r_indx = ELF32_R_SYM (irela->r_info); if (!get_sym_h (&h, &sym, &sym_sec, psyms, r_indx, sec->owner)) @@ -2034,27 +2117,53 @@ mark_functions_via_relocs (asection *sec, || sym_sec->output_section->owner != info->output_bfd) continue; - if (!bfd_get_section_contents (sec->owner, sec, insn, - irela->r_offset, 4)) - return FALSE; - if (!is_branch (insn)) - continue; + is_call = FALSE; + if (!reject) + { + unsigned char insn[4]; - if ((sym_sec->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE)) - != (SEC_ALLOC | SEC_LOAD | SEC_CODE)) + if (!bfd_get_section_contents (sec->owner, sec, insn, + irela->r_offset, 4)) + return FALSE; + if (is_branch (insn)) + { + is_call = (insn[0] & 0xfd) == 0x31; + if ((sym_sec->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE)) + != (SEC_ALLOC | SEC_LOAD | SEC_CODE)) + { + if (!warned) + info->callbacks->einfo + (_("%B(%A+0x%v): call to non-code section" + " %B(%A), analysis incomplete\n"), + sec->owner, sec, irela->r_offset, + sym_sec->owner, sym_sec); + warned = TRUE; + continue; + } + } + else + { + reject = TRUE; + if (!(call_tree && spu_hash_table (info)->auto_overlay) + || is_hint (insn)) + continue; + } + } + + if (reject) { - if (!call_tree) - warned = TRUE; - if (!call_tree || !warned) - info->callbacks->einfo (_("%B(%A+0x%v): call to non-code section" - " %B(%A), stack analysis incomplete\n"), - sec->owner, sec, irela->r_offset, - sym_sec->owner, sym_sec); + /* For --auto-overlay, count possible stubs we need for + function pointer references. */ + unsigned int sym_type; + if (h) + sym_type = h->type; + else + sym_type = ELF_ST_TYPE (sym->st_info); + if (sym_type == STT_FUNC) + spu_hash_table (info)->non_ovly_stub += 1; continue; } - is_call = (insn[0] & 0xfd) == 0x31; - if (h) val = h->root.u.def.value; else @@ -2098,6 +2207,13 @@ mark_functions_via_relocs (asection *sec, if (callee->fun == NULL) return FALSE; callee->is_tail = !is_call; + callee->is_pasted = FALSE; + callee->count = 0; + if (callee->fun->last_caller != sec) + { + callee->fun->last_caller = sec; + callee->fun->call_count += 1; + } if (!insert_callee (caller, callee)) free (callee); else if (!is_call @@ -2168,8 +2284,21 @@ pasted_function (asection *sec, struct bfd_link_info *info) if (l->u.indirect.section == sec) { if (fun_start != NULL) - fun->start = fun_start; - return TRUE; + { + struct call_info *callee = bfd_malloc (sizeof *callee); + if (callee == NULL) + return FALSE; + + fun->start = fun_start; + callee->fun = fun; + callee->is_tail = TRUE; + callee->is_pasted = TRUE; + callee->count = 0; + if (!insert_callee (fun_start, callee)) + free (callee); + return TRUE; + } + break; } if (l->type == bfd_indirect_link_order && (sec_data = spu_elf_section_data (l->u.indirect.section)) != NULL @@ -2509,15 +2638,18 @@ mark_non_root (struct function_info *fun, return TRUE; } -/* Remove cycles from the call graph. */ +/* Remove cycles from the call graph. Set depth of nodes. */ static bfd_boolean remove_cycles (struct function_info *fun, struct bfd_link_info *info, - void *param ATTRIBUTE_UNUSED) + void *param) { struct call_info **callp, *call; + unsigned int depth = *(unsigned int *) param; + unsigned int max_depth = depth; + fun->depth = depth; fun->visit2 = TRUE; fun->marking = TRUE; @@ -2526,17 +2658,23 @@ remove_cycles (struct function_info *fun, { if (!call->fun->visit2) { - if (!remove_cycles (call->fun, info, 0)) + call->max_depth = depth + !call->is_pasted; + if (!remove_cycles (call->fun, info, &call->max_depth)) return FALSE; + if (max_depth < call->max_depth) + max_depth = call->max_depth; } else if (call->fun->marking) { - const char *f1 = func_name (fun); - const char *f2 = func_name (call->fun); + if (!spu_hash_table (info)->auto_overlay) + { + const char *f1 = func_name (fun); + const char *f2 = func_name (call->fun); - info->callbacks->info (_("Stack analysis will ignore the call " - "from %s to %s\n"), - f1, f2); + info->callbacks->info (_("Stack analysis will ignore the call " + "from %s to %s\n"), + f1, f2); + } *callp = call->next; free (call); continue; @@ -2544,6 +2682,7 @@ remove_cycles (struct function_info *fun, callp = &call->next; } fun->marking = FALSE; + *(unsigned int *) param = max_depth; return TRUE; } @@ -2553,6 +2692,7 @@ static bfd_boolean build_call_tree (struct bfd_link_info *info) { bfd *ibfd; + unsigned int depth; for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next) { @@ -2569,7 +2709,8 @@ build_call_tree (struct bfd_link_info *info) /* Transfer call info from hot/cold section part of function to main entry. */ - if (!for_each_node (transfer_calls, info, 0, FALSE)) + if (!spu_hash_table (info)->auto_overlay + && !for_each_node (transfer_calls, info, 0, FALSE)) return FALSE; /* Find the call graph root(s). */ @@ -2578,7 +2719,526 @@ build_call_tree (struct bfd_link_info *info) /* Remove cycles from the call graph. We start from the root node(s) so that we break cycles in a reasonable place. */ - return for_each_node (remove_cycles, info, 0, TRUE); + depth = 0; + return for_each_node (remove_cycles, info, &depth, TRUE); +} + +/* qsort predicate to sort calls by max_depth then count. */ + +static int +sort_calls (const void *a, const void *b) +{ + struct call_info *const *c1 = a; + struct call_info *const *c2 = b; + int delta; + + delta = (*c2)->max_depth - (*c1)->max_depth; + if (delta != 0) + return delta; + + delta = (*c2)->count - (*c1)->count; + if (delta != 0) + return delta; + + return c1 - c2; +} + +struct _mos_param { + unsigned int max_overlay_size; +}; + +/* Set linker_mark and gc_mark on any sections that we will put in + overlays. These flags are used by the generic ELF linker, but we + won't be continuing on to bfd_elf_final_link so it is OK to use + them. linker_mark is clear before we get here. Set segment_mark + on sections that are part of a pasted function (excluding the last + section). + + Set up function rodata section if --overlay-rodata. We don't + currently include merged string constant rodata sections since + + Sort the call graph so that the deepest nodes will be visited + first. */ + +static bfd_boolean +mark_overlay_section (struct function_info *fun, + struct bfd_link_info *info, + void *param) +{ + struct call_info *call; + unsigned int count; + struct _mos_param *mos_param = param; + + if (fun->visit4) + return TRUE; + + fun->visit4 = TRUE; + if (!fun->sec->linker_mark) + { + fun->sec->linker_mark = 1; + fun->sec->gc_mark = 1; + fun->sec->segment_mark = 0; + /* Ensure SEC_CODE is set on this text section (it ought to + be!), and SEC_CODE is clear on rodata sections. We use + this flag to differentiate the two overlay section types. */ + fun->sec->flags |= SEC_CODE; + if (spu_hash_table (info)->auto_overlay & OVERLAY_RODATA) + { + char *name = NULL; + unsigned int size; + + /* Find the rodata section corresponding to this function's + text section. */ + if (strcmp (fun->sec->name, ".text") == 0) + { + name = bfd_malloc (sizeof (".rodata")); + if (name == NULL) + return FALSE; + memcpy (name, ".rodata", sizeof (".rodata")); + } + else if (strncmp (fun->sec->name, ".text.", 6) == 0) + { + size_t len = strlen (fun->sec->name); + name = bfd_malloc (len + 3); + if (name == NULL) + return FALSE; + memcpy (name, ".rodata", sizeof (".rodata")); + memcpy (name + 7, fun->sec->name + 5, len - 4); + } + else if (strncmp (fun->sec->name, ".gnu.linkonce.t.", 16) == 0) + { + size_t len = strlen (fun->sec->name) + 1; + name = bfd_malloc (len); + if (name == NULL) + return FALSE; + memcpy (name, fun->sec->name, len); + name[14] = 'r'; + } + + if (name != NULL) + { + asection *rodata = NULL; + asection *group_sec = elf_section_data (fun->sec)->next_in_group; + if (group_sec == NULL) + rodata = bfd_get_section_by_name (fun->sec->owner, name); + else + while (group_sec != NULL && group_sec != fun->sec) + { + if (strcmp (group_sec->name, name) == 0) + { + rodata = group_sec; + break; + } + group_sec = elf_section_data (group_sec)->next_in_group; + } + fun->rodata = rodata; + if (fun->rodata) + { + fun->rodata->linker_mark = 1; + fun->rodata->gc_mark = 1; + fun->rodata->flags &= ~SEC_CODE; + } + free (name); + } + size = fun->sec->size; + if (fun->rodata) + size += fun->rodata->size; + if (mos_param->max_overlay_size < size) + mos_param->max_overlay_size = size; + } + } + + for (count = 0, call = fun->call_list; call != NULL; call = call->next) + count += 1; + + if (count > 1) + { + struct call_info **calls = bfd_malloc (count * sizeof (*calls)); + if (calls == NULL) + return FALSE; + + for (count = 0, call = fun->call_list; call != NULL; call = call->next) + calls[count++] = call; + + qsort (calls, count, sizeof (*calls), sort_calls); + + fun->call_list = NULL; + while (count != 0) + { + --count; + calls[count]->next = fun->call_list; + fun->call_list = calls[count]; + } + free (calls); + } + + for (call = fun->call_list; call != NULL; call = call->next) + { + if (call->is_pasted) + { + /* There can only be one is_pasted call per function_info. */ + BFD_ASSERT (!fun->sec->segment_mark); + fun->sec->segment_mark = 1; + } + if (!mark_overlay_section (call->fun, info, param)) + return FALSE; + } + + /* Don't put entry code into an overlay. The overlay manager needs + a stack! */ + if (fun->lo + fun->sec->output_offset + fun->sec->output_section->vma + == info->output_bfd->start_address) + { + fun->sec->linker_mark = 0; + if (fun->rodata != NULL) + fun->rodata->linker_mark = 0; + } + return TRUE; +} + +struct _uos_param { + asection *exclude_input_section; + asection *exclude_output_section; + unsigned long clearing; +}; + +/* Undo some of mark_overlay_section's work. */ + +static bfd_boolean +unmark_overlay_section (struct function_info *fun, + struct bfd_link_info *info, + void *param) +{ + struct call_info *call; + struct _uos_param *uos_param = param; + unsigned int excluded = 0; + + if (fun->visit5) + return TRUE; + + fun->visit5 = TRUE; + + excluded = 0; + if (fun->sec == uos_param->exclude_input_section + || fun->sec->output_section == uos_param->exclude_output_section) + excluded = 1; + + uos_param->clearing += excluded; + + if (uos_param->clearing) + { + fun->sec->linker_mark = 0; + if (fun->rodata) + fun->rodata->linker_mark = 0; + } + + for (call = fun->call_list; call != NULL; call = call->next) + if (!unmark_overlay_section (call->fun, info, param)) + return FALSE; + + uos_param->clearing -= excluded; + return TRUE; +} + +struct _cl_param { + unsigned int lib_size; + asection **lib_sections; +}; + +/* Add sections we have marked as belonging to overlays to an array + for consideration as non-overlay sections. The array consist of + pairs of sections, (text,rodata), for functions in the call graph. */ + +static bfd_boolean +collect_lib_sections (struct function_info *fun, + struct bfd_link_info *info, + void *param) +{ + struct _cl_param *lib_param = param; + struct call_info *call; + unsigned int size; + + if (fun->visit6) + return TRUE; + + fun->visit6 = TRUE; + if (!fun->sec->linker_mark || !fun->sec->gc_mark || fun->sec->segment_mark) + return TRUE; + + size = fun->sec->size; + if (fun->rodata) + size += fun->rodata->size; + if (size > lib_param->lib_size) + return TRUE; + + *lib_param->lib_sections++ = fun->sec; + fun->sec->gc_mark = 0; + if (fun->rodata && fun->rodata->linker_mark && fun->rodata->gc_mark) + { + *lib_param->lib_sections++ = fun->rodata; + fun->rodata->gc_mark = 0; + } + else + *lib_param->lib_sections++ = NULL; + + for (call = fun->call_list; call != NULL; call = call->next) + collect_lib_sections (call->fun, info, param); + + return TRUE; +} + +/* qsort predicate to sort sections by call count. */ + +static int +sort_lib (const void *a, const void *b) +{ + asection *const *s1 = a; + asection *const *s2 = b; + struct _spu_elf_section_data *sec_data; + struct spu_elf_stack_info *sinfo; + int delta; + + delta = 0; + if ((sec_data = spu_elf_section_data (*s1)) != NULL + && (sinfo = sec_data->u.i.stack_info) != NULL) + { + int i; + for (i = 0; i < sinfo->num_fun; ++i) + delta -= sinfo->fun[i].call_count; + } + + if ((sec_data = spu_elf_section_data (*s2)) != NULL + && (sinfo = sec_data->u.i.stack_info) != NULL) + { + int i; + for (i = 0; i < sinfo->num_fun; ++i) + delta += sinfo->fun[i].call_count; + } + + if (delta != 0) + return delta; + + return s1 - s2; +} + +/* Remove some sections from those marked to be in overlays. Choose + those that are called from many places, likely library functions. */ + +static unsigned int +auto_ovl_lib_functions (struct bfd_link_info *info, unsigned int lib_size) +{ + bfd *ibfd; + asection **lib_sections; + unsigned int i, lib_count; + struct _cl_param collect_lib_param; + struct function_info dummy_caller; + + memset (&dummy_caller, 0, sizeof (dummy_caller)); + lib_count = 0; + for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next) + { + extern const bfd_target bfd_elf32_spu_vec; + asection *sec; + + if (ibfd->xvec != &bfd_elf32_spu_vec) + continue; + + for (sec = ibfd->sections; sec != NULL; sec = sec->next) + if (sec->linker_mark + && sec->size < lib_size + && (sec->flags & SEC_CODE) != 0) + lib_count += 1; + } + lib_sections = bfd_malloc (lib_count * 2 * sizeof (*lib_sections)); + if (lib_sections == NULL) + return (unsigned int) -1; + collect_lib_param.lib_size = lib_size; + collect_lib_param.lib_sections = lib_sections; + if (!for_each_node (collect_lib_sections, info, &collect_lib_param, + TRUE)) + return (unsigned int) -1; + lib_count = (collect_lib_param.lib_sections - lib_sections) / 2; + + /* Sort sections so that those with the most calls are first. */ + if (lib_count > 1) + qsort (lib_sections, lib_count, 2 * sizeof (*lib_sections), sort_lib); + + for (i = 0; i < lib_count; i++) + { + unsigned int tmp, stub_size; + asection *sec; + struct _spu_elf_section_data *sec_data; + struct spu_elf_stack_info *sinfo; + + sec = lib_sections[2 * i]; + /* If this section is OK, its size must be less than lib_size. */ + tmp = sec->size; + /* If it has a rodata section, then add that too. */ + if (lib_sections[2 * i + 1]) + tmp += lib_sections[2 * i + 1]->size; + /* Add any new overlay call stubs needed by the section. */ + stub_size = 0; + if (tmp < lib_size + && (sec_data = spu_elf_section_data (sec)) != NULL + && (sinfo = sec_data->u.i.stack_info) != NULL) + { + int k; + struct call_info *call; + + for (k = 0; k < sinfo->num_fun; ++k) + for (call = sinfo->fun[k].call_list; call; call = call->next) + if (call->fun->sec->linker_mark) + { + struct call_info *p; + for (p = dummy_caller.call_list; p; p = p->next) + if (p->fun == call->fun) + break; + if (!p) + stub_size += OVL_STUB_SIZE; + } + } + if (tmp + stub_size < lib_size) + { + struct call_info **pp, *p; + + /* This section fits. Mark it as non-overlay. */ + lib_sections[2 * i]->linker_mark = 0; + if (lib_sections[2 * i + 1]) + lib_sections[2 * i + 1]->linker_mark = 0; + lib_size -= tmp + stub_size; + /* Call stubs to the section we just added are no longer + needed. */ + pp = &dummy_caller.call_list; + while ((p = *pp) != NULL) + if (!p->fun->sec->linker_mark) + { + lib_size += OVL_STUB_SIZE; + *pp = p->next; + free (p); + } + else + pp = &p->next; + /* Add new call stubs to dummy_caller. */ + if ((sec_data = spu_elf_section_data (sec)) != NULL + && (sinfo = sec_data->u.i.stack_info) != NULL) + { + int k; + struct call_info *call; + + for (k = 0; k < sinfo->num_fun; ++k) + for (call = sinfo->fun[k].call_list; + call; + call = call->next) + if (call->fun->sec->linker_mark) + { + struct call_info *callee; + callee = bfd_malloc (sizeof (*callee)); + if (callee == NULL) + return (unsigned int) -1; + *callee = *call; + if (!insert_callee (&dummy_caller, callee)) + free (callee); + } + } + } + } + while (dummy_caller.call_list != NULL) + { + struct call_info *call = dummy_caller.call_list; + dummy_caller.call_list = call->next; + free (call); + } + for (i = 0; i < 2 * lib_count; i++) + if (lib_sections[i]) + lib_sections[i]->gc_mark = 1; + free (lib_sections); + return lib_size; +} + +/* Build an array of overlay sections. The deepest node's section is + added first, the its parent node's section, then everything called + from the parent section. The idea being to group sections to + minimise calls between different overlays. */ + +static bfd_boolean +collect_overlays (struct function_info *fun, + struct bfd_link_info *info, + void *param) +{ + struct call_info *call; + bfd_boolean added_fun; + asection ***ovly_sections = param; + + if (fun->visit7) + return TRUE; + + fun->visit7 = TRUE; + for (call = fun->call_list; call != NULL; call = call->next) + if (!call->is_pasted) + { + if (!collect_overlays (call->fun, info, ovly_sections)) + return FALSE; + break; + } + + added_fun = FALSE; + if (fun->sec->linker_mark && fun->sec->gc_mark) + { + fun->sec->gc_mark = 0; + *(*ovly_sections)++ = fun->sec; + if (fun->rodata && fun->rodata->linker_mark && fun->rodata->gc_mark) + { + fun->rodata->gc_mark = 0; + *(*ovly_sections)++ = fun->rodata; + } + else + *(*ovly_sections)++ = NULL; + added_fun = TRUE; + + /* Pasted sections must stay with the first section. We don't + put pasted sections in the array, just the first section. + Mark subsequent sections as already considered. */ + if (fun->sec->segment_mark) + { + struct function_info *call_fun = fun; + do + { + for (call = call_fun->call_list; call != NULL; call = call->next) + if (call->is_pasted) + { + call_fun = call->fun; + call_fun->sec->gc_mark = 0; + if (call_fun->rodata) + call_fun->rodata->gc_mark = 0; + break; + } + if (call == NULL) + abort (); + } + while (call_fun->sec->segment_mark); + } + } + + for (call = fun->call_list; call != NULL; call = call->next) + if (!collect_overlays (call->fun, info, ovly_sections)) + return FALSE; + + if (added_fun) + { + struct _spu_elf_section_data *sec_data; + struct spu_elf_stack_info *sinfo; + + if ((sec_data = spu_elf_section_data (fun->sec)) != NULL + && (sinfo = sec_data->u.i.stack_info) != NULL) + { + int i; + for (i = 0; i < sinfo->num_fun; ++i) + if (!collect_overlays (&sinfo->fun[i], info, ovly_sections)) + return FALSE; + } + } + + return TRUE; } struct _sum_stack_param { @@ -2598,23 +3258,28 @@ sum_stack (struct function_info *fun, struct function_info *max; size_t stack, cum_stack; const char *f1; + bfd_boolean has_call; struct _sum_stack_param *sum_stack_param = param; + struct spu_link_hash_table *htab; cum_stack = fun->stack; sum_stack_param->cum_stack = cum_stack; if (fun->visit3) return TRUE; + has_call = FALSE; max = NULL; for (call = fun->call_list; call; call = call->next) { + if (!call->is_pasted) + has_call = TRUE; if (!sum_stack (call->fun, info, sum_stack_param)) return FALSE; stack = sum_stack_param->cum_stack; /* Include caller stack for normal calls, don't do so for tail calls. fun->stack here is local stack usage for this function. */ - if (!call->is_tail) + if (!call->is_tail || call->is_pasted || call->fun->start != NULL) stack += fun->stack; if (cum_stack < stack) { @@ -2633,28 +3298,32 @@ sum_stack (struct function_info *fun, && sum_stack_param->overall_stack < cum_stack) sum_stack_param->overall_stack = cum_stack; + htab = spu_hash_table (info); + if (htab->auto_overlay) + return TRUE; + f1 = func_name (fun); if (!fun->non_root) info->callbacks->info (_(" %s: 0x%v\n"), f1, (bfd_vma) cum_stack); info->callbacks->minfo (_("%s: 0x%v 0x%v\n"), f1, (bfd_vma) stack, (bfd_vma) cum_stack); - if (fun->call_list) + if (has_call) { info->callbacks->minfo (_(" calls:\n")); for (call = fun->call_list; call; call = call->next) - { - const char *f2 = func_name (call->fun); - const char *ann1 = call->fun == max ? "*" : " "; - const char *ann2 = call->is_tail ? "t" : " "; + if (!call->is_pasted) + { + const char *f2 = func_name (call->fun); + const char *ann1 = call->fun == max ? "*" : " "; + const char *ann2 = call->is_tail ? "t" : " "; - info->callbacks->minfo (_(" %s%s %s\n"), ann1, ann2, f2); - } + info->callbacks->minfo (_(" %s%s %s\n"), ann1, ann2, f2); + } } if (sum_stack_param->emit_stack_syms) { - struct spu_link_hash_table *htab = spu_hash_table (info); char *name = bfd_malloc (18 + strlen (f1)); struct elf_link_hash_entry *h; @@ -2689,6 +3358,455 @@ sum_stack (struct function_info *fun, return TRUE; } +/* SEC is part of a pasted function. Return the call_info for the + next section of this function. */ + +static struct call_info * +find_pasted_call (asection *sec) +{ + struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec); + struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info; + struct call_info *call; + int k; + + for (k = 0; k < sinfo->num_fun; ++k) + for (call = sinfo->fun[k].call_list; call != NULL; call = call->next) + if (call->is_pasted) + return call; + abort (); + return 0; +} + +/* qsort predicate to sort bfds by file name. */ + +static int +sort_bfds (const void *a, const void *b) +{ + bfd *const *abfd1 = a; + bfd *const *abfd2 = b; + + return strcmp ((*abfd1)->filename, (*abfd2)->filename); +} + +/* Handle --auto-overlay. */ + +static void spu_elf_auto_overlay (struct bfd_link_info *, void (*) (void)) + ATTRIBUTE_NORETURN; + +static void +spu_elf_auto_overlay (struct bfd_link_info *info, + void (*spu_elf_load_ovl_mgr) (void)) +{ + bfd *ibfd; + bfd **bfd_arr; + struct elf_segment_map *m; + unsigned int fixed_size, lo, hi; + struct spu_link_hash_table *htab; + unsigned int base, i, count, bfd_count; + int ovlynum; + asection **ovly_sections, **ovly_p; + FILE *script; + unsigned int total_overlay_size, overlay_size; + struct elf_link_hash_entry *h; + struct _mos_param mos_param; + struct _uos_param uos_param; + struct function_info dummy_caller; + + /* Find the extents of our loadable image. */ + lo = (unsigned int) -1; + hi = 0; + for (m = elf_tdata (info->output_bfd)->segment_map; m != NULL; m = m->next) + if (m->p_type == PT_LOAD) + for (i = 0; i < m->count; i++) + if (m->sections[i]->size != 0) + { + if (m->sections[i]->vma < lo) + lo = m->sections[i]->vma; + if (m->sections[i]->vma + m->sections[i]->size - 1 > hi) + hi = m->sections[i]->vma + m->sections[i]->size - 1; + } + fixed_size = hi + 1 - lo; + + if (!discover_functions (info)) + goto err_exit; + + if (!build_call_tree (info)) + goto err_exit; + + uos_param.exclude_input_section = 0; + uos_param.exclude_output_section + = bfd_get_section_by_name (info->output_bfd, ".interrupt"); + + htab = spu_hash_table (info); + h = elf_link_hash_lookup (&htab->elf, "__ovly_load", + FALSE, FALSE, FALSE); + if (h != NULL + && (h->root.type == bfd_link_hash_defined + || h->root.type == bfd_link_hash_defweak) + && h->def_regular) + { + /* We have a user supplied overlay manager. */ + uos_param.exclude_input_section = h->root.u.def.section; + } + else + { + /* If no user overlay manager, spu_elf_load_ovl_mgr will add our + builtin version to .text, and will adjust .text size. */ + asection *text = bfd_get_section_by_name (info->output_bfd, ".text"); + if (text != NULL) + fixed_size -= text->size; + spu_elf_load_ovl_mgr (); + text = bfd_get_section_by_name (info->output_bfd, ".text"); + if (text != NULL) + fixed_size += text->size; + } + + /* Mark overlay sections, and find max overlay section size. */ + mos_param.max_overlay_size = 0; + if (!for_each_node (mark_overlay_section, info, &mos_param, TRUE)) + goto err_exit; + + /* We can't put the overlay manager or interrupt routines in + overlays. */ + uos_param.clearing = 0; + if ((uos_param.exclude_input_section + || uos_param.exclude_output_section) + && !for_each_node (unmark_overlay_section, info, &uos_param, TRUE)) + goto err_exit; + + bfd_count = 0; + for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next) + ++bfd_count; + bfd_arr = bfd_malloc (bfd_count * sizeof (*bfd_arr)); + if (bfd_arr == NULL) + goto err_exit; + + /* Count overlay sections, and subtract their sizes from "fixed_size". */ + count = 0; + bfd_count = 0; + total_overlay_size = 0; + for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next) + { + extern const bfd_target bfd_elf32_spu_vec; + asection *sec; + unsigned int old_count; + + if (ibfd->xvec != &bfd_elf32_spu_vec) + continue; + + old_count = count; + for (sec = ibfd->sections; sec != NULL; sec = sec->next) + if (sec->linker_mark) + { + if ((sec->flags & SEC_CODE) != 0) + count += 1; + fixed_size -= sec->size; + total_overlay_size += sec->size; + } + if (count != old_count) + bfd_arr[bfd_count++] = ibfd; + } + + /* Since the overlay link script selects sections by file name and + section name, ensure that file names are unique. */ + if (bfd_count > 1) + { + bfd_boolean ok = TRUE; + + qsort (bfd_arr, bfd_count, sizeof (*bfd_arr), sort_bfds); + for (i = 1; i < bfd_count; ++i) + if (strcmp (bfd_arr[i - 1]->filename, bfd_arr[i]->filename) == 0) + { + if (bfd_arr[i - 1]->my_archive && bfd_arr[i]->my_archive) + { + if (bfd_arr[i - 1]->my_archive == bfd_arr[i]->my_archive) + info->callbacks->einfo (_("%s duplicated in %s\n"), + bfd_arr[i - 1]->filename, + bfd_arr[i - 1]->my_archive->filename); + else + info->callbacks->einfo (_("%s in both %s and %s\n"), + bfd_arr[i - 1]->filename, + bfd_arr[i - 1]->my_archive->filename, + bfd_arr[i]->my_archive->filename); + } + else if (bfd_arr[i - 1]->my_archive) + info->callbacks->einfo (_("%s in %s and as an object\n"), + bfd_arr[i - 1]->filename, + bfd_arr[i - 1]->my_archive->filename); + else if (bfd_arr[i]->my_archive) + info->callbacks->einfo (_("%s in %s and as an object\n"), + bfd_arr[i]->filename, + bfd_arr[i]->my_archive->filename); + else + info->callbacks->einfo (_("%s duplicated\n"), + bfd_arr[i]->filename); + ok = FALSE; + } + if (!ok) + { + /* FIXME: modify plain object files from foo.o to ./foo.o + and emit EXCLUDE_FILE to handle the duplicates in + archives. There is a pathological case we can't handle: + We may have duplicate file names within a single archive. */ + info->callbacks->einfo (_("sorry, no support for duplicate " + "object files in auto-overlay script\n")); + bfd_set_error (bfd_error_bad_value); + goto err_exit; + } + } + free (bfd_arr); + + if (htab->reserved == 0) + { + struct _sum_stack_param sum_stack_param; + + sum_stack_param.emit_stack_syms = 0; + sum_stack_param.overall_stack = 0; + if (!for_each_node (sum_stack, info, &sum_stack_param, TRUE)) + goto err_exit; + htab->reserved = sum_stack_param.overall_stack; + } + fixed_size += htab->reserved; + fixed_size += htab->non_ovly_stub * OVL_STUB_SIZE; + if (fixed_size + mos_param.max_overlay_size <= htab->local_store) + { + /* Guess number of overlays. Assuming overlay buffer is on + average only half full should be conservative. */ + ovlynum = total_overlay_size * 2 / (htab->local_store - fixed_size); + /* Space for _ovly_table[], _ovly_buf_table[] and toe. */ + fixed_size += ovlynum * 16 + 16 + 4 + 16; + } + + if (fixed_size + mos_param.max_overlay_size > htab->local_store) + info->callbacks->einfo (_("non-overlay plus maximum overlay size " + "of 0x%x exceeds local store\n"), + fixed_size + mos_param.max_overlay_size); + + /* Now see if we should put some functions in the non-overlay area. */ + if (fixed_size < htab->overlay_fixed + && htab->overlay_fixed + mos_param.max_overlay_size < htab->local_store) + { + unsigned int lib_size = htab->overlay_fixed - fixed_size; + lib_size = auto_ovl_lib_functions (info, lib_size); + if (lib_size == (unsigned int) -1) + goto err_exit; + fixed_size = htab->overlay_fixed - lib_size; + } + + /* Build an array of sections, suitably sorted to place into + overlays. */ + ovly_sections = bfd_malloc (2 * count * sizeof (*ovly_sections)); + if (ovly_sections == NULL) + goto err_exit; + ovly_p = ovly_sections; + if (!for_each_node (collect_overlays, info, &ovly_p, TRUE)) + goto err_exit; + count = (size_t) (ovly_p - ovly_sections) / 2; + + script = htab->spu_elf_open_overlay_script (); + + if (fprintf (script, "SECTIONS\n{\n OVERLAY :\n {\n") <= 0) + goto file_err; + + memset (&dummy_caller, 0, sizeof (dummy_caller)); + overlay_size = htab->local_store - fixed_size; + base = 0; + ovlynum = 0; + while (base < count) + { + unsigned int size = 0; + unsigned int j; + + for (i = base; i < count; i++) + { + asection *sec; + unsigned int tmp; + unsigned int stub_size; + struct call_info *call, *pasty; + struct _spu_elf_section_data *sec_data; + struct spu_elf_stack_info *sinfo; + int k; + + /* See whether we can add this section to the current + overlay without overflowing our overlay buffer. */ + sec = ovly_sections[2 * i]; + tmp = size + sec->size; + if (ovly_sections[2 * i + 1]) + tmp += ovly_sections[2 * i + 1]->size; + if (tmp > overlay_size) + break; + if (sec->segment_mark) + { + /* Pasted sections must stay together, so add their + sizes too. */ + struct call_info *pasty = find_pasted_call (sec); + while (pasty != NULL) + { + struct function_info *call_fun = pasty->fun; + tmp += call_fun->sec->size; + if (call_fun->rodata) + tmp += call_fun->rodata->size; + for (pasty = call_fun->call_list; pasty; pasty = pasty->next) + if (pasty->is_pasted) + break; + } + } + if (tmp > overlay_size) + break; + + /* If we add this section, we might need new overlay call + stubs. Add any overlay section calls to dummy_call. */ + pasty = NULL; + sec_data = spu_elf_section_data (sec); + sinfo = sec_data->u.i.stack_info; + for (k = 0; k < sinfo->num_fun; ++k) + for (call = sinfo->fun[k].call_list; call; call = call->next) + if (call->is_pasted) + { + BFD_ASSERT (pasty == NULL); + pasty = call; + } + else if (call->fun->sec->linker_mark) + { + if (!copy_callee (&dummy_caller, call)) + goto err_exit; + } + while (pasty != NULL) + { + struct function_info *call_fun = pasty->fun; + pasty = NULL; + for (call = call_fun->call_list; call; call = call->next) + if (call->is_pasted) + { + BFD_ASSERT (pasty == NULL); + pasty = call; + } + else if (!copy_callee (&dummy_caller, call)) + goto err_exit; + } + + /* Calculate call stub size. */ + stub_size = 0; + for (call = dummy_caller.call_list; call; call = call->next) + { + unsigned int k; + + stub_size += OVL_STUB_SIZE; + /* If the call is within this overlay, we won't need a + stub. */ + for (k = base; k < i + 1; k++) + if (call->fun->sec == ovly_sections[2 * k]) + { + stub_size -= OVL_STUB_SIZE; + break; + } + } + if (tmp + stub_size > overlay_size) + break; + + size = tmp; + } + + if (i == base) + { + info->callbacks->einfo (_("%B:%A%s exceeds overlay size\n"), + ovly_sections[2 * i]->owner, + ovly_sections[2 * i], + ovly_sections[2 * i + 1] ? " + rodata" : ""); + bfd_set_error (bfd_error_bad_value); + goto err_exit; + } + + if (fprintf (script, " .ovly%d {\n", ++ovlynum) <= 0) + goto file_err; + for (j = base; j < i; j++) + { + asection *sec = ovly_sections[2 * j]; + + if (fprintf (script, " [%c]%s (%s)\n", + sec->owner->filename[0], + sec->owner->filename + 1, + sec->name) <= 0) + goto file_err; + if (sec->segment_mark) + { + struct call_info *call = find_pasted_call (sec); + while (call != NULL) + { + struct function_info *call_fun = call->fun; + sec = call_fun->sec; + if (fprintf (script, " [%c]%s (%s)\n", + sec->owner->filename[0], + sec->owner->filename + 1, + sec->name) <= 0) + goto file_err; + for (call = call_fun->call_list; call; call = call->next) + if (call->is_pasted) + break; + } + } + } + + for (j = base; j < i; j++) + { + asection *sec = ovly_sections[2 * j + 1]; + if (sec != NULL && fprintf (script, " [%c]%s (%s)\n", + sec->owner->filename[0], + sec->owner->filename + 1, + sec->name) <= 0) + goto file_err; + + sec = ovly_sections[2 * j]; + if (sec->segment_mark) + { + struct call_info *call = find_pasted_call (sec); + while (call != NULL) + { + struct function_info *call_fun = call->fun; + sec = call_fun->rodata; + if (sec != NULL && fprintf (script, " [%c]%s (%s)\n", + sec->owner->filename[0], + sec->owner->filename + 1, + sec->name) <= 0) + goto file_err; + for (call = call_fun->call_list; call; call = call->next) + if (call->is_pasted) + break; + } + } + } + + if (fprintf (script, " }\n") <= 0) + goto file_err; + + while (dummy_caller.call_list != NULL) + { + struct call_info *call = dummy_caller.call_list; + dummy_caller.call_list = call->next; + free (call); + } + + base = i; + } + free (ovly_sections); + + if (fprintf (script, " }\n}\nINSERT AFTER .text;\n") <= 0) + goto file_err; + if (fclose (script) != 0) + goto file_err; + + if (htab->auto_overlay & AUTO_RELINK) + htab->spu_elf_relink (); + + xexit (0); + + file_err: + bfd_set_error (bfd_error_system_call); + err_exit: + info->callbacks->einfo ("%F%P: auto overlay error: %E\n"); + xexit (1); +} + /* Provide an estimate of total stack required. */ static bfd_boolean @@ -2723,6 +3841,9 @@ spu_elf_final_link (bfd *output_bfd, struct bfd_link_info *info) { struct spu_link_hash_table *htab = spu_hash_table (info); + if (htab->auto_overlay) + spu_elf_auto_overlay (info, htab->spu_elf_load_ovl_mgr); + if (htab->stack_analysis && !spu_elf_stack_analysis (info, htab->emit_stack_syms)) info->callbacks->einfo ("%X%P: stack analysis error: %E\n"); diff --git a/bfd/elf32-spu.h b/bfd/elf32-spu.h index b7e50a0..e0141d5 100644 --- a/bfd/elf32-spu.h +++ b/bfd/elf32-spu.h @@ -60,4 +60,7 @@ extern int spu_elf_size_stubs (struct bfd_link_info *, void (*) (asection *, asection *, const char *), int); extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int); -extern asection *spu_elf_check_vma (struct bfd_link_info *, bfd_vma, bfd_vma); +extern asection *spu_elf_check_vma (struct bfd_link_info *, int, + unsigned int, unsigned int, unsigned int, + unsigned int, void (*) (void), + FILE *(*) (void), void (*) (void)); |