aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Modra <amodra@gmail.com>2008-01-28 05:59:24 +0000
committerAlan Modra <amodra@gmail.com>2008-01-28 05:59:24 +0000
commit47f6dab9a3fb353b0faca52fcc07d2f57c4d906c (patch)
tree9cc0fcaf801807941794b51c5a6726a21692d0b6
parent8693ff0ffb451be8a54dceda84bfc78246271dd9 (diff)
downloadfsf-binutils-gdb-47f6dab9a3fb353b0faca52fcc07d2f57c4d906c.zip
fsf-binutils-gdb-47f6dab9a3fb353b0faca52fcc07d2f57c4d906c.tar.gz
fsf-binutils-gdb-47f6dab9a3fb353b0faca52fcc07d2f57c4d906c.tar.bz2
Rewrite SPU overlay handling code. Put overlay calls stubs in the
overlays where possible. Use a faster call stub, or optionally at compile time, a more compact stub. Double size of _ovly_buf_table so that low bit of _ovly_table.buf can be used as a "present" bit. Reserve an extra _ovly_table entry for index zero.
-rw-r--r--bfd/ChangeLog43
-rw-r--r--bfd/elf32-spu.c1144
-rw-r--r--bfd/elf32-spu.h27
-rw-r--r--ld/ChangeLog12
-rw-r--r--ld/emultempl/spu_ovl.S884
-rw-r--r--ld/emultempl/spu_ovl.obin1432 -> 1524 bytes
-rw-r--r--ld/emultempl/spuelf.em47
-rw-r--r--ld/testsuite/ChangeLog5
-rw-r--r--ld/testsuite/ld-spu/ovl.d213
-rw-r--r--ld/testsuite/ld-spu/ovl2.d90
10 files changed, 1160 insertions, 1305 deletions
diff --git a/bfd/ChangeLog b/bfd/ChangeLog
index bfdc204..e95e50f 100644
--- a/bfd/ChangeLog
+++ b/bfd/ChangeLog
@@ -1,3 +1,46 @@
+2008-01-28 Alan Modra <amodra@bigpond.net.au>
+
+ Rewrite SPU overlay handling code. Put overlay calls stubs in the
+ overlays where possible. Use a faster call stub, or optionally at
+ compile time, a more compact stub. Double size of _ovly_buf_table
+ so that low bit of _ovly_table.buf can be used as a "present" bit.
+ Reserve an extra _ovly_table entry for index zero.
+ * elf32-spu.c: (struct spu_link_hash_table): Delete a number of fields,
+ add new ones.
+ (struct got_entry): New.
+ (struct spu_stub_hash_entry): Delete.
+ (stub_hash_newfunc, spu_elf_link_hash_table_free): Delete.
+ (spu_elf_link_hash_table_create): Adjust for struct changes.
+ (spu_stub_name): Delete.
+ (spu_elf_find_overlays): Don't track sections from overlay regions.
+ Instead set ovl_buf number in spu elf section data. Error if
+ overlays in one region don't start at the same address. Adjust
+ for struct _spu_elf_section_data changes.
+ (SIZEOF_STUB1, SIZEOF_STUB2, ILA_79, ILA_78): Delete.
+ (OVL_STUB_SIZE, BRSL, LNOP, ILA): Define.
+ (needs_ovl_stub): Adjust for struct _spu_elf_section_data changes.
+ (enum _insn_type): New.
+ (count_stub, build_stub): New functions.
+ (allocate_spuear_stubs): Use count_stub.
+ (build_spuear_stubs): Use build_stub.
+ (populate_stubs, sort_stubs): Delete.
+ (process_stubs): New function.
+ (write_one_stub): Delete.
+ (spu_elf_size_stubs, spu_elf_build_stubs): Rewrite.
+ (alloc_stack_info): Adjust for struct _spu_elf_section_data changes.
+ (maybe_insert_function, check_function_ranges): Likewise.
+ (find_function, pasted_function, build_call_tree): Likewise.
+ (spu_elf_stack_analysis, spu_elf_modify_segment_map): Likewise.
+ (spu_elf_modify_program_headers): Likewise.
+ (interesting_section): Detect stub sections differently. Delete
+ htab param, adjust all callers.
+ (spu_elf_relocate_section): Rewrite stub handling.
+ (spu_elf_output_symbol_hook): Likewise.
+ (bfd_elf32_bfd_link_hash_table_free): Delete.
+ * elf32-spu.h (struct _spu_elf_section_data): Move input-only and
+ output-only fields into a union. Add ovl_buf.
+ (spu_elf_size_stubs, spu_elf_build_stubs): Update prototypes.
+
2008-01-25 DJ Delorie <dj@redhat.com>
* elf32-m32c.c (_bfd_m32c_elf_eh_frame_address_size): New.
diff --git a/bfd/elf32-spu.c b/bfd/elf32-spu.c
index 62a922a..d9ba935 100644
--- a/bfd/elf32-spu.c
+++ b/bfd/elf32-spu.c
@@ -257,31 +257,21 @@ struct spu_link_hash_table
{
struct elf_link_hash_table elf;
- /* The stub hash table. */
- struct bfd_hash_table stub_hash_table;
-
- /* Sorted array of stubs. */
- struct {
- struct spu_stub_hash_entry **sh;
- unsigned int count;
- int err;
- } stubs;
-
/* Shortcuts to overlay sections. */
- asection *stub;
asection *ovtab;
+ asection *toe;
+ asection **ovl_sec;
+
+ /* Count of stubs in each overlay section. */
+ unsigned int *stub_count;
+
+ /* The stub section for each overlay section. */
+ asection **stub_sec;
struct elf_link_hash_entry *ovly_load;
+ struct elf_link_hash_entry *ovly_return;
unsigned long ovly_load_r_symndx;
- /* An array of two output sections per overlay region, chosen such that
- the first section vma is the overlay buffer vma (ie. the section has
- the lowest vma in the group that occupy the region), and the second
- section vma+size specifies the end of the region. We keep pointers
- to sections like this because section vmas may change when laying
- them out. */
- asection **ovl_region;
-
/* Number of overlay buffers. */
unsigned int num_buf;
@@ -296,7 +286,7 @@ struct spu_link_hash_table
unsigned int non_overlay_stubs : 1;
/* Set on error. */
- unsigned int stub_overflow : 1;
+ unsigned int stub_err : 1;
/* Set if stack size analysis should be done. */
unsigned int stack_analysis : 1;
@@ -305,54 +295,17 @@ struct spu_link_hash_table
unsigned int emit_stack_syms : 1;
};
-#define spu_hash_table(p) \
- ((struct spu_link_hash_table *) ((p)->hash))
+/* Hijack the generic got fields for overlay stub accounting. */
-struct spu_stub_hash_entry
+struct got_entry
{
- struct bfd_hash_entry root;
-
- /* Destination of this stub. */
- asection *target_section;
- bfd_vma target_off;
-
- /* Offset of entry in stub section. */
- bfd_vma off;
-
- /* Offset from this stub to stub that loads the overlay index. */
- bfd_vma delta;
+ struct got_entry *next;
+ unsigned int ovl;
+ bfd_vma stub_addr;
};
-/* Create an entry in a spu stub hash table. */
-
-static struct bfd_hash_entry *
-stub_hash_newfunc (struct bfd_hash_entry *entry,
- struct bfd_hash_table *table,
- const char *string)
-{
- /* Allocate the structure if it has not already been allocated by a
- subclass. */
- if (entry == NULL)
- {
- entry = bfd_hash_allocate (table, sizeof (struct spu_stub_hash_entry));
- if (entry == NULL)
- return entry;
- }
-
- /* Call the allocation method of the superclass. */
- entry = bfd_hash_newfunc (entry, table, string);
- if (entry != NULL)
- {
- struct spu_stub_hash_entry *sh = (struct spu_stub_hash_entry *) entry;
-
- sh->target_section = NULL;
- sh->target_off = 0;
- sh->off = 0;
- sh->delta = 0;
- }
-
- return entry;
-}
+#define spu_hash_table(p) \
+ ((struct spu_link_hash_table *) ((p)->hash))
/* Create a spu ELF linker hash table. */
@@ -373,28 +326,16 @@ spu_elf_link_hash_table_create (bfd *abfd)
return NULL;
}
- /* Init the stub hash table too. */
- if (!bfd_hash_table_init (&htab->stub_hash_table, stub_hash_newfunc,
- sizeof (struct spu_stub_hash_entry)))
- return NULL;
-
- memset (&htab->stubs, 0,
- sizeof (*htab) - offsetof (struct spu_link_hash_table, stubs));
+ memset (&htab->ovtab, 0,
+ sizeof (*htab) - offsetof (struct spu_link_hash_table, ovtab));
+ htab->elf.init_got_refcount.refcount = 0;
+ htab->elf.init_got_refcount.glist = NULL;
+ htab->elf.init_got_offset.offset = 0;
+ htab->elf.init_got_offset.glist = NULL;
return &htab->elf.root;
}
-/* Free the derived linker hash table. */
-
-static void
-spu_elf_link_hash_table_free (struct bfd_link_hash_table *hash)
-{
- struct spu_link_hash_table *ret = (struct spu_link_hash_table *) hash;
-
- bfd_hash_table_free (&ret->stub_hash_table);
- _bfd_generic_link_hash_table_free (hash);
-}
-
/* Find the symbol for the given R_SYMNDX in IBFD and set *HP and *SYMP
to (hash, NULL) for global symbols, and (NULL, sym) for locals. Set
*SYMSECP to the symbol's section. *LOCSYMSP caches local syms. */
@@ -480,51 +421,6 @@ get_sym_h (struct elf_link_hash_entry **hp,
return TRUE;
}
-/* Build a name for an entry in the stub hash table. We can't use a
- local symbol name because ld -r might generate duplicate local symbols. */
-
-static char *
-spu_stub_name (const asection *sym_sec,
- const struct elf_link_hash_entry *h,
- const Elf_Internal_Rela *rel)
-{
- char *stub_name;
- bfd_size_type len;
-
- if (h)
- {
- len = strlen (h->root.root.string) + 1 + 8 + 1;
- stub_name = bfd_malloc (len);
- if (stub_name == NULL)
- return stub_name;
-
- sprintf (stub_name, "%s+%x",
- h->root.root.string,
- (int) rel->r_addend & 0xffffffff);
- len -= 8;
- }
- else
- {
- len = 8 + 1 + 8 + 1 + 8 + 1;
- stub_name = bfd_malloc (len);
- if (stub_name == NULL)
- return stub_name;
-
- sprintf (stub_name, "%x:%x+%x",
- sym_sec->id & 0xffffffff,
- (int) ELF32_R_SYM (rel->r_info) & 0xffffffff,
- (int) rel->r_addend & 0xffffffff);
- len = strlen (stub_name);
- }
-
- if (stub_name[len - 2] == '+'
- && stub_name[len - 1] == '0'
- && stub_name[len] == 0)
- stub_name[len - 2] = 0;
-
- return stub_name;
-}
-
/* Create the note section if not already present. This is done early so
that the linker maps the sections to the right place in the output. */
@@ -634,9 +530,7 @@ spu_elf_find_overlays (bfd *output_bfd, struct bfd_link_info *info)
qsort (alloc_sec, n, sizeof (*alloc_sec), sort_sections);
/* Look for overlapping vmas. Any with overlap must be overlays.
- Count them. Also count the number of overlay regions and for
- each region save a section from that region with the lowest vma
- and another section with the highest end vma. */
+ Count them. Also count the number of overlay regions. */
ovl_end = alloc_sec[0]->vma + alloc_sec[0]->size;
for (ovl_index = 0, num_buf = 0, i = 1; i < n; i++)
{
@@ -645,19 +539,24 @@ spu_elf_find_overlays (bfd *output_bfd, struct bfd_link_info *info)
{
asection *s0 = alloc_sec[i - 1];
- if (spu_elf_section_data (s0)->ovl_index == 0)
+ if (spu_elf_section_data (s0)->u.o.ovl_index == 0)
{
- spu_elf_section_data (s0)->ovl_index = ++ovl_index;
- alloc_sec[num_buf * 2] = s0;
- alloc_sec[num_buf * 2 + 1] = s0;
- num_buf++;
+ alloc_sec[ovl_index] = s0;
+ spu_elf_section_data (s0)->u.o.ovl_index = ++ovl_index;
+ spu_elf_section_data (s0)->u.o.ovl_buf = ++num_buf;
}
- spu_elf_section_data (s)->ovl_index = ++ovl_index;
- if (ovl_end < s->vma + s->size)
+ alloc_sec[ovl_index] = s;
+ spu_elf_section_data (s)->u.o.ovl_index = ++ovl_index;
+ spu_elf_section_data (s)->u.o.ovl_buf = num_buf;
+ if (s0->vma != s->vma)
{
- ovl_end = s->vma + s->size;
- alloc_sec[num_buf * 2 - 1] = s;
+ info->callbacks->einfo (_("%X%P: overlay sections %A and %A "
+ "do not start at the same address.\n"),
+ s0, s);
+ return FALSE;
}
+ if (ovl_end < s->vma + s->size)
+ ovl_end = s->vma + s->size;
}
else
ovl_end = s->vma + s->size;
@@ -665,30 +564,22 @@ spu_elf_find_overlays (bfd *output_bfd, struct bfd_link_info *info)
htab->num_overlays = ovl_index;
htab->num_buf = num_buf;
- if (ovl_index == 0)
- {
- free (alloc_sec);
- return FALSE;
- }
-
- alloc_sec = bfd_realloc (alloc_sec, num_buf * 2 * sizeof (*alloc_sec));
- if (alloc_sec == NULL)
- return FALSE;
-
- htab->ovl_region = alloc_sec;
- return TRUE;
+ htab->ovl_sec = alloc_sec;
+ return ovl_index != 0;
}
-/* One of these per stub. */
-#define SIZEOF_STUB1 8
-#define ILA_79 0x4200004f /* ila $79,function_address */
-#define BR 0x32000000 /* br stub2 */
-
-/* One of these per overlay. */
-#define SIZEOF_STUB2 8
-#define ILA_78 0x4200004e /* ila $78,overlay_number */
- /* br __ovly_load */
+/* Support two sizes of overlay stubs, a slower more compact stub of two
+ intructions, and a faster stub of four instructions. */
+#ifndef OVL_STUB_SIZE
+/* Default to faster. */
+#define OVL_STUB_SIZE 16
+/* #define OVL_STUB_SIZE 8 */
+#endif
+#define BRSL 0x33000000
+#define BR 0x32000000
#define NOP 0x40200000
+#define LNOP 0x00200000
+#define ILA 0x42000000
/* Return true for all relative and absolute branch instructions.
bra 00110000 0..
@@ -757,14 +648,14 @@ needs_ovl_stub (const char *sym_name,
return TRUE;
/* Usually, symbols in non-overlay sections don't need stubs. */
- if (spu_elf_section_data (sym_sec->output_section)->ovl_index == 0
+ if (spu_elf_section_data (sym_sec->output_section)->u.o.ovl_index == 0
&& !htab->non_overlay_stubs)
return FALSE;
/* A reference from some other section to a symbol in an overlay
section needs a stub. */
- if (spu_elf_section_data (sym_sec->output_section)->ovl_index
- != spu_elf_section_data (input_section->output_section)->ovl_index)
+ if (spu_elf_section_data (sym_sec->output_section)->u.o.ovl_index
+ != spu_elf_section_data (input_section->output_section)->u.o.ovl_index)
return TRUE;
/* If this insn isn't a branch then we are possibly taking the
@@ -772,6 +663,229 @@ needs_ovl_stub (const char *sym_name,
return !is_branch;
}
+enum _insn_type { non_branch, branch, call };
+
+static bfd_boolean
+count_stub (struct spu_link_hash_table *htab,
+ bfd *ibfd,
+ asection *isec,
+ enum _insn_type insn_type,
+ struct elf_link_hash_entry *h,
+ const Elf_Internal_Rela *irela)
+{
+ unsigned int ovl = 0;
+ struct got_entry *g, **head;
+
+ /* If this instruction is a branch or call, we need a stub
+ for it. One stub per function per overlay.
+ If it isn't a branch, then we are taking the address of
+ this function so need a stub in the non-overlay area
+ for it. One stub per function. */
+ if (insn_type != non_branch)
+ ovl = spu_elf_section_data (isec->output_section)->u.o.ovl_index;
+
+ if (h != NULL)
+ head = &h->got.glist;
+ else
+ {
+ if (elf_local_got_ents (ibfd) == NULL)
+ {
+ bfd_size_type amt = (elf_tdata (ibfd)->symtab_hdr.sh_info
+ * sizeof (*elf_local_got_ents (ibfd)));
+ elf_local_got_ents (ibfd) = bfd_zmalloc (amt);
+ if (elf_local_got_ents (ibfd) == NULL)
+ return FALSE;
+ }
+ head = elf_local_got_ents (ibfd) + ELF32_R_SYM (irela->r_info);
+ }
+
+ /* If we have a stub in the non-overlay area then there's no need
+ for one in overlays. */
+ g = *head;
+ if (g != NULL && g->ovl == 0)
+ return TRUE;
+
+ if (ovl == 0)
+ {
+ struct got_entry *gnext;
+
+ /* Need a new non-overlay area stub. Zap other stubs. */
+ for (; g != NULL; g = gnext)
+ {
+ htab->stub_count[g->ovl] -= 1;
+ gnext = g->next;
+ free (g);
+ }
+ }
+ else
+ {
+ for (; g != NULL; g = g->next)
+ if (g->ovl == ovl)
+ break;
+ }
+
+ if (g == NULL)
+ {
+ g = bfd_malloc (sizeof *g);
+ if (g == NULL)
+ return FALSE;
+ g->ovl = ovl;
+ g->stub_addr = (bfd_vma) -1;
+ g->next = *head;
+ *head = g;
+
+ htab->stub_count[ovl] += 1;
+ }
+
+ return TRUE;
+}
+
+/* Two instruction overlay stubs look like:
+
+ brsl $75,__ovly_load
+ .word target_ovl_and_address
+
+ ovl_and_address is a word with the overlay number in the top 14 bits
+ and local store address in the bottom 18 bits.
+
+ Four instruction overlay stubs look like:
+
+ ila $78,ovl_number
+ lnop
+ ila $79,target_address
+ br __ovly_load */
+
+static bfd_boolean
+build_stub (struct spu_link_hash_table *htab,
+ bfd *ibfd,
+ asection *isec,
+ enum _insn_type insn_type,
+ struct elf_link_hash_entry *h,
+ const Elf_Internal_Rela *irela,
+ bfd_vma dest,
+ asection *dest_sec)
+{
+ unsigned int ovl;
+ struct got_entry *g, **head;
+ asection *sec;
+ bfd_vma val, from, to;
+
+ ovl = 0;
+ if (insn_type != non_branch)
+ ovl = spu_elf_section_data (isec->output_section)->u.o.ovl_index;
+
+ if (h != NULL)
+ head = &h->got.glist;
+ else
+ head = elf_local_got_ents (ibfd) + ELF32_R_SYM (irela->r_info);
+
+ g = *head;
+ if (g != NULL && g->ovl == 0 && ovl != 0)
+ return TRUE;
+
+ for (; g != NULL; g = g->next)
+ if (g->ovl == ovl)
+ break;
+ if (g == NULL)
+ abort ();
+
+ if (g->stub_addr != (bfd_vma) -1)
+ return TRUE;
+
+ sec = htab->stub_sec[ovl];
+ dest += dest_sec->output_offset + dest_sec->output_section->vma;
+ from = sec->size + sec->output_offset + sec->output_section->vma;
+ g->stub_addr = from;
+ to = (htab->ovly_load->root.u.def.value
+ + htab->ovly_load->root.u.def.section->output_offset
+ + htab->ovly_load->root.u.def.section->output_section->vma);
+ val = to - from;
+ if (OVL_STUB_SIZE == 16)
+ val -= 12;
+ if (((dest | to | from) & 3) != 0
+ || val + 0x20000 >= 0x40000)
+ {
+ htab->stub_err = 1;
+ return FALSE;
+ }
+ ovl = spu_elf_section_data (dest_sec->output_section)->u.o.ovl_index;
+
+ if (OVL_STUB_SIZE == 16)
+ {
+ bfd_put_32 (sec->owner, ILA + ((ovl << 7) & 0x01ffff80) + 78,
+ sec->contents + sec->size);
+ bfd_put_32 (sec->owner, LNOP,
+ sec->contents + sec->size + 4);
+ bfd_put_32 (sec->owner, ILA + ((dest << 7) & 0x01ffff80) + 79,
+ sec->contents + sec->size + 8);
+ bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80),
+ sec->contents + sec->size + 12);
+ }
+ else if (OVL_STUB_SIZE == 8)
+ {
+ bfd_put_32 (sec->owner, BRSL + ((val << 5) & 0x007fff80) + 75,
+ sec->contents + sec->size);
+
+ val = (dest & 0x3ffff) | (ovl << 14);
+ bfd_put_32 (sec->owner, val,
+ sec->contents + sec->size + 4);
+ }
+ else
+ abort ();
+ sec->size += OVL_STUB_SIZE;
+
+ if (htab->emit_stub_syms)
+ {
+ size_t len;
+ char *name;
+ int add;
+
+ len = 8 + sizeof (".ovl_call.") - 1;
+ if (h != NULL)
+ len += strlen (h->root.root.string);
+ else
+ len += 8 + 1 + 8;
+ add = 0;
+ if (irela != NULL)
+ add = (int) irela->r_addend & 0xffffffff;
+ if (add != 0)
+ len += 1 + 8;
+ name = bfd_malloc (len);
+ if (name == NULL)
+ return FALSE;
+
+ sprintf (name, "%08x.ovl_call.", g->ovl);
+ if (h != NULL)
+ strcpy (name + 8 + sizeof (".ovl_call.") - 1, h->root.root.string);
+ else
+ sprintf (name + 8 + sizeof (".ovl_call.") - 1, "%x:%x",
+ dest_sec->id & 0xffffffff,
+ (int) ELF32_R_SYM (irela->r_info) & 0xffffffff);
+ if (add != 0)
+ sprintf (name + len - 9, "+%x", add);
+
+ h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE);
+ free (name);
+ if (h == NULL)
+ return FALSE;
+ if (h->root.type == bfd_link_hash_new)
+ {
+ h->root.type = bfd_link_hash_defined;
+ h->root.u.def.section = sec;
+ h->root.u.def.value = sec->size - OVL_STUB_SIZE;
+ h->size = OVL_STUB_SIZE;
+ h->type = STT_FUNC;
+ h->ref_regular = 1;
+ h->def_regular = 1;
+ h->ref_regular_nonweak = 1;
+ h->forced_local = 1;
+ h->non_elf = 0;
+ }
+ }
+
+ return TRUE;
+}
+
/* Called via elf_link_hash_traverse to allocate stubs for any _SPUEAR_
symbols. */
@@ -786,103 +900,47 @@ allocate_spuear_stubs (struct elf_link_hash_entry *h, void *inf)
&& strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
{
struct spu_link_hash_table *htab = inf;
- static Elf_Internal_Rela zero_rel;
- char *stub_name = spu_stub_name (h->root.u.def.section, h, &zero_rel);
- struct spu_stub_hash_entry *sh;
-
- if (stub_name == NULL)
- {
- htab->stubs.err = 1;
- return FALSE;
- }
-
- sh = (struct spu_stub_hash_entry *)
- bfd_hash_lookup (&htab->stub_hash_table, stub_name, TRUE, FALSE);
- if (sh == NULL)
- {
- free (stub_name);
- return FALSE;
- }
-
- /* If this entry isn't new, we already have a stub. */
- if (sh->target_section != NULL)
- {
- free (stub_name);
- return TRUE;
- }
- sh->target_section = h->root.u.def.section;
- sh->target_off = h->root.u.def.value;
- htab->stubs.count += 1;
+ count_stub (htab, NULL, NULL, non_branch, h, NULL);
}
return TRUE;
}
-/* Called via bfd_hash_traverse to set up pointers to all symbols
- in the stub hash table. */
-
static bfd_boolean
-populate_stubs (struct bfd_hash_entry *bh, void *inf)
+build_spuear_stubs (struct elf_link_hash_entry *h, void *inf)
{
- struct spu_link_hash_table *htab = inf;
+ /* Symbols starting with _SPUEAR_ need a stub because they may be
+ invoked by the PPU. */
+ if ((h->root.type == bfd_link_hash_defined
+ || h->root.type == bfd_link_hash_defweak)
+ && h->def_regular
+ && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
+ {
+ struct spu_link_hash_table *htab = inf;
- htab->stubs.sh[--htab->stubs.count] = (struct spu_stub_hash_entry *) bh;
+ build_stub (htab, NULL, NULL, non_branch, h, NULL,
+ h->root.u.def.value, h->root.u.def.section);
+ }
+
return TRUE;
}
-/* qsort predicate to sort stubs by overlay number. */
+/* Size or build stubs. */
-static int
-sort_stubs (const void *a, const void *b)
-{
- const struct spu_stub_hash_entry *const *sa = a;
- const struct spu_stub_hash_entry *const *sb = b;
- int i;
- bfd_signed_vma d;
-
- i = spu_elf_section_data ((*sa)->target_section->output_section)->ovl_index;
- i -= spu_elf_section_data ((*sb)->target_section->output_section)->ovl_index;
- if (i != 0)
- return i;
-
- d = ((*sa)->target_section->output_section->vma
- + (*sa)->target_section->output_offset
- + (*sa)->target_off
- - (*sb)->target_section->output_section->vma
- - (*sb)->target_section->output_offset
- - (*sb)->target_off);
- if (d != 0)
- return d < 0 ? -1 : 1;
-
- /* Two functions at the same address. Aliases perhaps. */
- i = strcmp ((*sb)->root.string, (*sa)->root.string);
- BFD_ASSERT (i != 0);
- return i;
-}
-
-/* Allocate space for overlay call and return stubs. */
-
-bfd_boolean
-spu_elf_size_stubs (bfd *output_bfd,
- struct bfd_link_info *info,
- int non_overlay_stubs,
- int stack_analysis,
- asection **stub,
- asection **ovtab,
- asection **toe)
+static bfd_boolean
+process_stubs (bfd *output_bfd,
+ struct bfd_link_info *info,
+ bfd_boolean build)
{
struct spu_link_hash_table *htab = spu_hash_table (info);
bfd *ibfd;
- unsigned i, group;
- flagword flags;
- htab->non_overlay_stubs = non_overlay_stubs;
for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
{
extern const bfd_target bfd_elf32_spu_vec;
Elf_Internal_Shdr *symtab_hdr;
- asection *section;
+ asection *isec;
Elf_Internal_Sym *local_syms = NULL;
void *psyms;
@@ -896,37 +954,36 @@ spu_elf_size_stubs (bfd *output_bfd,
/* Arrange to read and keep global syms for later stack analysis. */
psyms = &local_syms;
- if (stack_analysis)
+ if (htab->stack_analysis)
psyms = &symtab_hdr->contents;
/* Walk over each section attached to the input bfd. */
- for (section = ibfd->sections; section != NULL; section = section->next)
+ for (isec = ibfd->sections; isec != NULL; isec = isec->next)
{
Elf_Internal_Rela *internal_relocs, *irelaend, *irela;
/* If there aren't any relocs, then there's nothing more to do. */
- if ((section->flags & SEC_RELOC) == 0
- || (section->flags & SEC_ALLOC) == 0
- || (section->flags & SEC_LOAD) == 0
- || section->reloc_count == 0)
+ if ((isec->flags & SEC_RELOC) == 0
+ || (isec->flags & SEC_ALLOC) == 0
+ || (isec->flags & SEC_LOAD) == 0
+ || isec->reloc_count == 0)
continue;
/* If this section is a link-once section that will be
discarded, then don't create any stubs. */
- if (section->output_section == NULL
- || section->output_section->owner != output_bfd)
+ if (isec->output_section == NULL
+ || isec->output_section->owner != output_bfd)
continue;
/* Get the relocs. */
- internal_relocs
- = _bfd_elf_link_read_relocs (ibfd, section, NULL, NULL,
- info->keep_memory);
+ internal_relocs = _bfd_elf_link_read_relocs (ibfd, isec, NULL, NULL,
+ info->keep_memory);
if (internal_relocs == NULL)
goto error_ret_free_local;
/* Now examine each relocation. */
irela = internal_relocs;
- irelaend = irela + section->reloc_count;
+ irelaend = irela + isec->reloc_count;
for (; irela < irelaend; irela++)
{
enum elf_spu_reloc_type r_type;
@@ -935,10 +992,8 @@ spu_elf_size_stubs (bfd *output_bfd,
Elf_Internal_Sym *sym;
struct elf_link_hash_entry *h;
const char *sym_name;
- char *stub_name;
- struct spu_stub_hash_entry *sh;
unsigned int sym_type;
- enum _insn_type { non_branch, branch, call } insn_type;
+ enum _insn_type insn_type;
r_type = ELF32_R_TYPE (irela->r_info);
r_indx = ELF32_R_SYM (irela->r_info);
@@ -946,7 +1001,15 @@ spu_elf_size_stubs (bfd *output_bfd,
if (r_type >= R_SPU_max)
{
bfd_set_error (bfd_error_bad_value);
- goto error_ret_free_internal;
+ error_ret_free_internal:
+ if (elf_section_data (isec)->relocs != internal_relocs)
+ free (internal_relocs);
+ error_ret_free_local:
+ if (local_syms != NULL
+ && (symtab_hdr->contents
+ != (unsigned char *) local_syms))
+ free (local_syms);
+ return FALSE;
}
/* Determine the reloc target section. */
@@ -970,7 +1033,7 @@ spu_elf_size_stubs (bfd *output_bfd,
{
unsigned char insn[4];
- if (!bfd_get_section_contents (ibfd, section, insn,
+ if (!bfd_get_section_contents (ibfd, isec, insn,
irela->r_offset, 4))
goto error_ret_free_internal;
@@ -1013,50 +1076,40 @@ spu_elf_size_stubs (bfd *output_bfd,
continue;
}
- if (!needs_ovl_stub (sym_name, sym_sec, section, htab,
+ if (!needs_ovl_stub (sym_name, sym_sec, isec, htab,
insn_type != non_branch))
continue;
- stub_name = spu_stub_name (sym_sec, h, irela);
- if (stub_name == NULL)
- goto error_ret_free_internal;
-
- sh = (struct spu_stub_hash_entry *)
- bfd_hash_lookup (&htab->stub_hash_table, stub_name,
- TRUE, FALSE);
- if (sh == NULL)
+ if (htab->stub_count == NULL)
{
- free (stub_name);
- error_ret_free_internal:
- if (elf_section_data (section)->relocs != internal_relocs)
- free (internal_relocs);
- error_ret_free_local:
- if (local_syms != NULL
- && (symtab_hdr->contents
- != (unsigned char *) local_syms))
- free (local_syms);
- return FALSE;
+ bfd_size_type amt;
+ amt = (htab->num_overlays + 1) * sizeof (*htab->stub_count);
+ htab->stub_count = bfd_zmalloc (amt);
+ if (htab->stub_count == NULL)
+ goto error_ret_free_internal;
}
- /* If this entry isn't new, we already have a stub. */
- if (sh->target_section != NULL)
+ if (!build)
{
- free (stub_name);
- continue;
+ if (!count_stub (htab, ibfd, isec, insn_type, h, irela))
+ goto error_ret_free_internal;
}
-
- sh->target_section = sym_sec;
- if (h != NULL)
- sh->target_off = h->root.u.def.value;
else
- sh->target_off = sym->st_value;
- sh->target_off += irela->r_addend;
-
- htab->stubs.count += 1;
+ {
+ bfd_vma dest;
+
+ if (h != NULL)
+ dest = h->root.u.def.value;
+ else
+ dest = sym->st_value;
+ if (!build_stub (htab, ibfd, isec, insn_type, h, irela,
+ dest, sym_sec))
+ goto error_ret_free_internal;
+ }
}
/* We're done with the internal relocs, free them. */
- if (elf_section_data (section)->relocs != internal_relocs)
+ if (elf_section_data (isec)->relocs != internal_relocs)
free (internal_relocs);
}
@@ -1070,99 +1123,64 @@ spu_elf_size_stubs (bfd *output_bfd,
}
}
+ return TRUE;
+}
+
+/* Allocate space for overlay call and return stubs. */
+
+int
+spu_elf_size_stubs (bfd *output_bfd,
+ struct bfd_link_info *info,
+ void (*place_spu_section) (asection *, asection *,
+ const char *),
+ int non_overlay_stubs)
+{
+ struct spu_link_hash_table *htab = spu_hash_table (info);
+ bfd *ibfd;
+ bfd_size_type amt;
+ flagword flags;
+ unsigned int i;
+ asection *stub;
+
+ htab->non_overlay_stubs = non_overlay_stubs;
+ if (!process_stubs (output_bfd, info, FALSE))
+ return 0;
+
elf_link_hash_traverse (&htab->elf, allocate_spuear_stubs, htab);
- if (htab->stubs.err)
- return FALSE;
+ if (htab->stub_err)
+ return 0;
- *stub = NULL;
- if (htab->stubs.count == 0)
- return TRUE;
+ if (htab->stub_count == NULL)
+ return 1;
ibfd = info->input_bfds;
- flags = (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_READONLY
- | SEC_HAS_CONTENTS | SEC_IN_MEMORY);
- htab->stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags);
- *stub = htab->stub;
- if (htab->stub == NULL
- || !bfd_set_section_alignment (ibfd, htab->stub, 4))
- return FALSE;
+ amt = (htab->num_overlays + 1) * sizeof (*htab->stub_sec);
+ htab->stub_sec = bfd_zmalloc (amt);
+ if (htab->stub_sec == NULL)
+ return 0;
- flags = (SEC_ALLOC | SEC_LOAD
+ flags = (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_READONLY
| SEC_HAS_CONTENTS | SEC_IN_MEMORY);
- htab->ovtab = bfd_make_section_anyway_with_flags (ibfd, ".ovtab", flags);
- *ovtab = htab->ovtab;
- if (htab->ovtab == NULL
- || !bfd_set_section_alignment (ibfd, htab->ovtab, 4))
- return FALSE;
-
- *toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC);
- if (*toe == NULL
- || !bfd_set_section_alignment (ibfd, *toe, 4))
- return FALSE;
- (*toe)->size = 16;
+ stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags);
+ htab->stub_sec[0] = stub;
+ if (stub == NULL
+ || !bfd_set_section_alignment (ibfd, stub, 3 + (OVL_STUB_SIZE > 8)))
+ return 0;
+ stub->size = htab->stub_count[0] * OVL_STUB_SIZE;
+ (*place_spu_section) (stub, NULL, ".text");
- /* Retrieve all the stubs and sort. */
- htab->stubs.sh = bfd_malloc (htab->stubs.count * sizeof (*htab->stubs.sh));
- if (htab->stubs.sh == NULL)
- return FALSE;
- i = htab->stubs.count;
- bfd_hash_traverse (&htab->stub_hash_table, populate_stubs, htab);
- BFD_ASSERT (htab->stubs.count == 0);
-
- htab->stubs.count = i;
- qsort (htab->stubs.sh, htab->stubs.count, sizeof (*htab->stubs.sh),
- sort_stubs);
-
- /* Now that the stubs are sorted, place them in the stub section.
- Stubs are grouped per overlay
- . ila $79,func1
- . br 1f
- . ila $79,func2
- . br 1f
- .
- .
- . ila $79,funcn
- . nop
- . 1:
- . ila $78,ovl_index
- . br __ovly_load */
-
- group = 0;
- for (i = 0; i < htab->stubs.count; i++)
+ for (i = 0; i < htab->num_overlays; ++i)
{
- if (spu_elf_section_data (htab->stubs.sh[group]->target_section
- ->output_section)->ovl_index
- != spu_elf_section_data (htab->stubs.sh[i]->target_section
- ->output_section)->ovl_index)
- {
- htab->stub->size += SIZEOF_STUB2;
- for (; group != i; group++)
- htab->stubs.sh[group]->delta
- = htab->stubs.sh[i - 1]->off - htab->stubs.sh[group]->off;
- }
- if (group == i
- || ((htab->stubs.sh[i - 1]->target_section->output_section->vma
- + htab->stubs.sh[i - 1]->target_section->output_offset
- + htab->stubs.sh[i - 1]->target_off)
- != (htab->stubs.sh[i]->target_section->output_section->vma
- + htab->stubs.sh[i]->target_section->output_offset
- + htab->stubs.sh[i]->target_off)))
- {
- htab->stubs.sh[i]->off = htab->stub->size;
- htab->stub->size += SIZEOF_STUB1;
- if (info->emitrelocations)
- htab->stub->reloc_count += 1;
- }
- else
- htab->stubs.sh[i]->off = htab->stubs.sh[i - 1]->off;
+ asection *osec = htab->ovl_sec[i];
+ unsigned int ovl = spu_elf_section_data (osec)->u.o.ovl_index;
+ stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags);
+ htab->stub_sec[ovl] = stub;
+ if (stub == NULL
+ || !bfd_set_section_alignment (ibfd, stub, 3 + (OVL_STUB_SIZE > 8)))
+ return 0;
+ stub->size = htab->stub_count[ovl] * OVL_STUB_SIZE;
+ (*place_spu_section) (stub, osec, NULL);
}
- if (group != i)
- htab->stub->size += SIZEOF_STUB2;
- if (info->emitrelocations)
- htab->stub->flags |= SEC_RELOC;
- for (; group != i; group++)
- htab->stubs.sh[group]->delta
- = htab->stubs.sh[i - 1]->off - htab->stubs.sh[group]->off;
/* htab->ovtab consists of two arrays.
. struct {
@@ -1174,12 +1192,27 @@ spu_elf_size_stubs (bfd *output_bfd,
.
. struct {
. u32 mapped;
- . } _ovly_buf_table[]; */
+ . } _ovly_buf_table[];
+ . */
- htab->ovtab->alignment_power = 4;
- htab->ovtab->size = htab->num_overlays * 16 + htab->num_buf * 4;
+ flags = (SEC_ALLOC | SEC_LOAD
+ | SEC_HAS_CONTENTS | SEC_IN_MEMORY);
+ htab->ovtab = bfd_make_section_anyway_with_flags (ibfd, ".ovtab", flags);
+ if (htab->ovtab == NULL
+ || !bfd_set_section_alignment (ibfd, htab->ovtab, 4))
+ return 0;
- return TRUE;
+ htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4;
+ (*place_spu_section) (htab->ovtab, NULL, ".data");
+
+ htab->toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC);
+ if (htab->toe == NULL
+ || !bfd_set_section_alignment (ibfd, htab->toe, 4))
+ return 0;
+ htab->toe->size = 16;
+ (*place_spu_section) (htab->toe, NULL, ".toe");
+
+ return 2;
}
/* Functions to handle embedded spu_ovl.o object. */
@@ -1228,149 +1261,6 @@ spu_elf_open_builtin_lib (bfd **ovl_bfd, const struct _ovl_stream *stream)
return *ovl_bfd != NULL;
}
-/* Fill in the ila and br for a stub. On the last stub for a group,
- write the stub that sets the overlay number too. */
-
-static bfd_boolean
-write_one_stub (struct spu_stub_hash_entry *ent, struct bfd_link_info *info)
-{
- struct spu_link_hash_table *htab = spu_hash_table (info);
- asection *sec = htab->stub;
- asection *s = ent->target_section;
- unsigned int ovl;
- bfd_vma val;
-
- val = ent->target_off + s->output_offset + s->output_section->vma;
- bfd_put_32 (sec->owner, ILA_79 + ((val << 7) & 0x01ffff80),
- sec->contents + ent->off);
- val = ent->delta + 4;
- bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80),
- sec->contents + ent->off + 4);
-
- if (info->emitrelocations)
- {
- Elf_Internal_Rela *relocs, *r;
- struct bfd_elf_section_data *elfsec_data;
-
- elfsec_data = elf_section_data (sec);
- relocs = elfsec_data->relocs;
- if (relocs == NULL)
- {
- bfd_size_type relsize;
- Elf_Internal_Shdr *symtab_hdr;
- struct elf_link_hash_entry **sym_hash;
- unsigned long symcount;
- bfd_vma amt;
-
- relsize = sec->reloc_count * sizeof (*relocs);
- relocs = bfd_alloc (sec->owner, relsize);
- if (relocs == NULL)
- return FALSE;
- elfsec_data->relocs = relocs;
- elfsec_data->rel_hdr.sh_size
- = sec->reloc_count * sizeof (Elf32_External_Rela);
- elfsec_data->rel_hdr.sh_entsize = sizeof (Elf32_External_Rela);
- sec->reloc_count = 0;
-
- /* Increase the size of symbol hash array on the bfd to
- which we attached our .stub section. This hack allows
- us to create relocs against global symbols. */
- symtab_hdr = &elf_tdata (sec->owner)->symtab_hdr;
- symcount = symtab_hdr->sh_size / symtab_hdr->sh_entsize;
- symcount -= symtab_hdr->sh_info;
- amt = symcount * sizeof (*sym_hash);
- sym_hash = bfd_alloc (sec->owner, amt + sizeof (*sym_hash));
- if (sym_hash == NULL)
- return FALSE;
- memcpy (sym_hash, elf_sym_hashes (sec->owner), amt);
- sym_hash[symcount] = htab->ovly_load;
- htab->ovly_load_r_symndx = symcount + symtab_hdr->sh_info;
- elf_sym_hashes (sec->owner) = sym_hash;
- }
- r = relocs + sec->reloc_count;
- sec->reloc_count += 1;
- r->r_offset = ent->off + 4;
- r->r_info = ELF32_R_INFO (0, R_SPU_REL16);
- r->r_addend = (sec->output_section->vma
- + sec->output_offset
- + ent->off + 4
- + val);
- }
-
- /* If this is the last stub of this group, write stub2. */
- if (ent->delta == 0)
- {
- bfd_put_32 (sec->owner, NOP,
- sec->contents + ent->off + 4);
-
- ovl = spu_elf_section_data (s->output_section)->ovl_index;
- bfd_put_32 (sec->owner, ILA_78 + ((ovl << 7) & 0x01ffff80),
- sec->contents + ent->off + 8);
-
- val = (htab->ovly_load->root.u.def.section->output_section->vma
- + htab->ovly_load->root.u.def.section->output_offset
- + htab->ovly_load->root.u.def.value
- - (sec->output_section->vma
- + sec->output_offset
- + ent->off + 12));
-
- if (val + 0x20000 >= 0x40000)
- htab->stub_overflow = TRUE;
-
- bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80),
- sec->contents + ent->off + 12);
-
- if (info->emitrelocations)
- {
- Elf_Internal_Rela *relocs, *r;
- struct bfd_elf_section_data *elfsec_data;
-
- elfsec_data = elf_section_data (sec);
- relocs = elfsec_data->relocs;
- /* The last branch is overwritten, so overwrite its reloc too. */
- r = relocs + sec->reloc_count - 1;
- r->r_offset = ent->off + 12;
- r->r_info = ELF32_R_INFO (htab->ovly_load_r_symndx, R_SPU_REL16);
- r->r_addend = 0;
- }
- }
-
- if (htab->emit_stub_syms)
- {
- struct elf_link_hash_entry *h;
- size_t len1, len2;
- char *name;
-
- len1 = sizeof ("00000000.ovl_call.") - 1;
- len2 = strlen (ent->root.string);
- name = bfd_malloc (len1 + len2 + 1);
- if (name == NULL)
- return FALSE;
- memcpy (name, "00000000.ovl_call.", len1);
- memcpy (name + len1, ent->root.string, len2 + 1);
- h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE);
- free (name);
- if (h == NULL)
- return FALSE;
- if (h->root.type == bfd_link_hash_new)
- {
- h->root.type = bfd_link_hash_defined;
- h->root.u.def.section = sec;
- h->root.u.def.value = ent->off;
- h->size = (ent->delta == 0
- ? SIZEOF_STUB1 + SIZEOF_STUB2 : SIZEOF_STUB1);
- h->type = STT_FUNC;
- h->ref_regular = 1;
- h->def_regular = 1;
- h->ref_regular_nonweak = 1;
- h->forced_local = 1;
- h->non_elf = 0;
- }
- }
-
- return TRUE;
-}
-
/* Define an STT_OBJECT symbol. */
static struct elf_link_hash_entry *
@@ -1408,7 +1298,7 @@ define_ovtab_symbol (struct spu_link_hash_table *htab, const char *name)
/* Fill in all stubs and the overlay tables. */
bfd_boolean
-spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe)
+spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms)
{
struct spu_link_hash_table *htab = spu_hash_table (info);
struct elf_link_hash_entry *h;
@@ -1418,9 +1308,19 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe)
unsigned int i;
htab->emit_stub_syms = emit_syms;
- htab->stub->contents = bfd_zalloc (htab->stub->owner, htab->stub->size);
- if (htab->stub->contents == NULL)
- return FALSE;
+ if (htab->stub_count == NULL)
+ return TRUE;
+
+ for (i = 0; i <= htab->num_overlays; i++)
+ if (htab->stub_sec[i]->size != 0)
+ {
+ htab->stub_sec[i]->contents = bfd_zalloc (htab->stub_sec[i]->owner,
+ htab->stub_sec[i]->size);
+ if (htab->stub_sec[i]->contents == NULL)
+ return FALSE;
+ htab->stub_sec[i]->rawsize = htab->stub_sec[i]->size;
+ htab->stub_sec[i]->size = 0;
+ }
h = elf_link_hash_lookup (&htab->elf, "__ovly_load", FALSE, FALSE, FALSE);
htab->ovly_load = h;
@@ -1430,7 +1330,7 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe)
&& h->def_regular);
s = h->root.u.def.section->output_section;
- if (spu_elf_section_data (s)->ovl_index)
+ if (spu_elf_section_data (s)->u.o.ovl_index)
{
(*_bfd_error_handler) (_("%s in overlay section"),
h->root.u.def.section->owner);
@@ -1438,11 +1338,29 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe)
return FALSE;
}
+ h = elf_link_hash_lookup (&htab->elf, "__ovly_return", FALSE, FALSE, FALSE);
+ htab->ovly_return = h;
+
/* Write out all the stubs. */
- for (i = 0; i < htab->stubs.count; i++)
- write_one_stub (htab->stubs.sh[i], info);
+ obfd = htab->ovtab->output_section->owner;
+ process_stubs (obfd, info, TRUE);
+
+ elf_link_hash_traverse (&htab->elf, build_spuear_stubs, htab);
+ if (htab->stub_err)
+ return FALSE;
- if (htab->stub_overflow)
+ for (i = 0; i <= htab->num_overlays; i++)
+ {
+ if (htab->stub_sec[i]->size != htab->stub_sec[i]->rawsize)
+ {
+ (*_bfd_error_handler) (_("stubs don't match calculated size"));
+ bfd_set_error (bfd_error_bad_value);
+ return FALSE;
+ }
+ htab->stub_sec[i]->rawsize = 0;
+ }
+
+ if (htab->stub_err)
{
(*_bfd_error_handler) (_("overlay stub relocation overflow"));
bfd_set_error (bfd_error_bad_value);
@@ -1455,75 +1373,52 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe)
/* Write out _ovly_table. */
p = htab->ovtab->contents;
- obfd = htab->ovtab->output_section->owner;
+ /* set low bit of .buf to mark non-overlay area as present. */
+ p[15] = 1;
for (s = obfd->sections; s != NULL; s = s->next)
{
- unsigned int ovl_index = spu_elf_section_data (s)->ovl_index;
+ unsigned int ovl_index = spu_elf_section_data (s)->u.o.ovl_index;
if (ovl_index != 0)
{
- unsigned int lo, hi, mid;
- unsigned long off = (ovl_index - 1) * 16;
+ unsigned long off = ovl_index * 16;
+ unsigned int ovl_buf = spu_elf_section_data (s)->u.o.ovl_buf;
+
bfd_put_32 (htab->ovtab->owner, s->vma, p + off);
bfd_put_32 (htab->ovtab->owner, (s->size + 15) & -16, p + off + 4);
/* file_off written later in spu_elf_modify_program_headers. */
-
- lo = 0;
- hi = htab->num_buf;
- while (lo < hi)
- {
- mid = (lo + hi) >> 1;
- if (htab->ovl_region[2 * mid + 1]->vma
- + htab->ovl_region[2 * mid + 1]->size <= s->vma)
- lo = mid + 1;
- else if (htab->ovl_region[2 * mid]->vma > s->vma)
- hi = mid;
- else
- {
- bfd_put_32 (htab->ovtab->owner, mid + 1, p + off + 12);
- break;
- }
- }
- BFD_ASSERT (lo < hi);
+ bfd_put_32 (htab->ovtab->owner, ovl_buf * 2, p + off + 12);
}
}
- /* Write out _ovly_buf_table. */
- p = htab->ovtab->contents + htab->num_overlays * 16;
- for (i = 0; i < htab->num_buf; i++)
- {
- bfd_put_32 (htab->ovtab->owner, 0, p);
- p += 4;
- }
-
h = define_ovtab_symbol (htab, "_ovly_table");
if (h == NULL)
return FALSE;
- h->root.u.def.value = 0;
+ h->root.u.def.value = 16;
h->size = htab->num_overlays * 16;
h = define_ovtab_symbol (htab, "_ovly_table_end");
if (h == NULL)
return FALSE;
- h->root.u.def.value = htab->num_overlays * 16;
+ h->root.u.def.value = htab->num_overlays * 16 + 16;
h->size = 0;
h = define_ovtab_symbol (htab, "_ovly_buf_table");
if (h == NULL)
return FALSE;
- h->root.u.def.value = htab->num_overlays * 16;
- h->size = htab->num_buf * 4;
+ h->root.u.def.value = htab->num_overlays * 16 + 16;
+ h->size = htab->num_buf * 2 * 4;
h = define_ovtab_symbol (htab, "_ovly_buf_table_end");
if (h == NULL)
return FALSE;
- h->root.u.def.value = htab->num_overlays * 16 + htab->num_buf * 4;
+ h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4;
h->size = 0;
h = define_ovtab_symbol (htab, "_EAR_");
if (h == NULL)
return FALSE;
- h->root.u.def.section = toe;
+ h->root.u.def.section = htab->toe;
h->root.u.def.value = 0;
h->size = 16;
@@ -1716,10 +1611,10 @@ alloc_stack_info (asection *sec, int max_fun)
amt = sizeof (struct spu_elf_stack_info);
amt += (max_fun - 1) * sizeof (struct function_info);
- sec_data->stack_info = bfd_zmalloc (amt);
- if (sec_data->stack_info != NULL)
- sec_data->stack_info->max_fun = max_fun;
- return sec_data->stack_info;
+ sec_data->u.i.stack_info = bfd_zmalloc (amt);
+ if (sec_data->u.i.stack_info != NULL)
+ sec_data->u.i.stack_info->max_fun = max_fun;
+ return sec_data->u.i.stack_info;
}
/* Add a new struct function_info describing a (part of a) function
@@ -1732,7 +1627,7 @@ maybe_insert_function (asection *sec,
bfd_boolean is_func)
{
struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
- struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+ struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
int i;
bfd_vma off, size;
@@ -1796,7 +1691,7 @@ maybe_insert_function (asection *sec,
if (sinfo == NULL)
return NULL;
memset ((char *) sinfo + old, 0, amt - old);
- sec_data->stack_info = sinfo;
+ sec_data->u.i.stack_info = sinfo;
}
sinfo->fun[i].is_func = is_func;
sinfo->fun[i].global = global;
@@ -1887,7 +1782,7 @@ static bfd_boolean
check_function_ranges (asection *sec, struct bfd_link_info *info)
{
struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
- struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+ struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
int i;
bfd_boolean gaps = FALSE;
@@ -1933,7 +1828,7 @@ static struct function_info *
find_function (asection *sec, bfd_vma offset, struct bfd_link_info *info)
{
struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
- struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+ struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
int lo, hi, mid;
lo = 0;
@@ -2155,7 +2050,7 @@ pasted_function (asection *sec, struct bfd_link_info *info)
}
if (l->type == bfd_indirect_link_order
&& (sec_data = spu_elf_section_data (l->u.indirect.section)) != NULL
- && (sinfo = sec_data->stack_info) != NULL
+ && (sinfo = sec_data->u.i.stack_info) != NULL
&& sinfo->num_fun != 0)
fun_start = &sinfo->fun[sinfo->num_fun - 1];
}
@@ -2164,15 +2059,15 @@ pasted_function (asection *sec, struct bfd_link_info *info)
return FALSE;
}
-/* We're only interested in code sections. */
+/* We're only interested in code sections. Testing SEC_IN_MEMORY excludes
+ overlay stub sections. */
static bfd_boolean
-interesting_section (asection *s, bfd *obfd, struct spu_link_hash_table *htab)
+interesting_section (asection *s, bfd *obfd)
{
- return (s != htab->stub
- && s->output_section != NULL
+ return (s->output_section != NULL
&& s->output_section->owner == obfd
- && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+ && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_IN_MEMORY))
== (SEC_ALLOC | SEC_LOAD | SEC_CODE))
&& s->size != 0);
}
@@ -2182,7 +2077,6 @@ interesting_section (asection *s, bfd *obfd, struct spu_link_hash_table *htab)
static bfd_boolean
discover_functions (bfd *output_bfd, struct bfd_link_info *info)
{
- struct spu_link_hash_table *htab = spu_hash_table (info);
bfd *ibfd;
int bfd_idx;
Elf_Internal_Sym ***psym_arr;
@@ -2247,7 +2141,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info)
asection *s;
*p = s = bfd_section_from_elf_index (ibfd, sy->st_shndx);
- if (s != NULL && interesting_section (s, output_bfd, htab))
+ if (s != NULL && interesting_section (s, output_bfd))
*psy++ = sy;
}
symcount = psy - psyms;
@@ -2289,7 +2183,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info)
}
for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next)
- if (interesting_section (sec, output_bfd, htab))
+ if (interesting_section (sec, output_bfd))
gaps |= check_function_ranges (sec, info);
}
@@ -2307,7 +2201,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info)
continue;
for (sec = ibfd->sections; sec != NULL; sec = sec->next)
- if (interesting_section (sec, output_bfd, htab)
+ if (interesting_section (sec, output_bfd)
&& sec->reloc_count != 0)
{
if (!mark_functions_via_relocs (sec, info, FALSE))
@@ -2334,7 +2228,7 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info)
gaps = FALSE;
for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next)
- if (interesting_section (sec, output_bfd, htab))
+ if (interesting_section (sec, output_bfd))
gaps |= check_function_ranges (sec, info);
if (!gaps)
continue;
@@ -2360,13 +2254,13 @@ discover_functions (bfd *output_bfd, struct bfd_link_info *info)
the range of such functions to the beginning of the
next symbol of interest. */
for (sec = ibfd->sections; sec != NULL; sec = sec->next)
- if (interesting_section (sec, output_bfd, htab))
+ if (interesting_section (sec, output_bfd))
{
struct _spu_elf_section_data *sec_data;
struct spu_elf_stack_info *sinfo;
sec_data = spu_elf_section_data (sec);
- sinfo = sec_data->stack_info;
+ sinfo = sec_data->u.i.stack_info;
if (sinfo != NULL)
{
int fun_idx;
@@ -2455,7 +2349,6 @@ call_graph_traverse (struct function_info *fun, struct bfd_link_info *info)
static bfd_boolean
build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
{
- struct spu_link_hash_table *htab = spu_hash_table (info);
bfd *ibfd;
for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
@@ -2468,7 +2361,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
for (sec = ibfd->sections; sec != NULL; sec = sec->next)
{
- if (!interesting_section (sec, output_bfd, htab)
+ if (!interesting_section (sec, output_bfd)
|| sec->reloc_count == 0)
continue;
@@ -2484,7 +2377,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
struct spu_elf_stack_info *sinfo;
if ((sec_data = spu_elf_section_data (sec)) != NULL
- && (sinfo = sec_data->stack_info) != NULL)
+ && (sinfo = sec_data->u.i.stack_info) != NULL)
{
int i;
for (i = 0; i < sinfo->num_fun; ++i)
@@ -2523,7 +2416,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
struct spu_elf_stack_info *sinfo;
if ((sec_data = spu_elf_section_data (sec)) != NULL
- && (sinfo = sec_data->stack_info) != NULL)
+ && (sinfo = sec_data->u.i.stack_info) != NULL)
{
int i;
for (i = 0; i < sinfo->num_fun; ++i)
@@ -2549,7 +2442,7 @@ build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
struct spu_elf_stack_info *sinfo;
if ((sec_data = spu_elf_section_data (sec)) != NULL
- && (sinfo = sec_data->stack_info) != NULL)
+ && (sinfo = sec_data->u.i.stack_info) != NULL)
{
int i;
for (i = 0; i < sinfo->num_fun; ++i)
@@ -2684,7 +2577,7 @@ spu_elf_stack_analysis (bfd *output_bfd,
struct spu_elf_stack_info *sinfo;
if ((sec_data = spu_elf_section_data (sec)) != NULL
- && (sinfo = sec_data->stack_info) != NULL)
+ && (sinfo = sec_data->u.i.stack_info) != NULL)
{
int i;
for (i = 0; i < sinfo->num_fun; ++i)
@@ -2847,25 +2740,31 @@ spu_elf_relocate_section (bfd *output_bfd,
addend = rel->r_addend;
branch = (is_branch (contents + rel->r_offset)
|| is_hint (contents + rel->r_offset));
- if (needs_ovl_stub (sym_name, sec, input_section, htab, branch))
+ if (htab->stub_sec != NULL
+ && needs_ovl_stub (sym_name, sec, input_section, htab, branch)
+ && (h == NULL
+ || (h != htab->ovly_load && h != htab->ovly_return)))
{
- char *stub_name;
- struct spu_stub_hash_entry *sh;
+ unsigned int ovl = 0;
+ struct got_entry *g, **head;
- stub_name = spu_stub_name (sec, h, rel);
- if (stub_name == NULL)
- return FALSE;
+ if (branch)
+ ovl = (spu_elf_section_data (input_section->output_section)
+ ->u.o.ovl_index);
- sh = (struct spu_stub_hash_entry *)
- bfd_hash_lookup (&htab->stub_hash_table, stub_name, FALSE, FALSE);
- if (sh != NULL)
- {
- relocation = (htab->stub->output_section->vma
- + htab->stub->output_offset
- + sh->off);
- addend = 0;
- }
- free (stub_name);
+ if (h != NULL)
+ head = &h->got.glist;
+ else
+ head = elf_local_got_ents (input_bfd) + r_symndx;
+
+ for (g = *head; g != NULL; g = g->next)
+ if (g->ovl == ovl || g->ovl == 0)
+ break;
+ if (g == NULL)
+ abort ();
+
+ relocation = g->stub_addr;
+ addend = 0;
}
r = _bfd_final_link_relocate (howto,
@@ -2960,30 +2859,22 @@ spu_elf_output_symbol_hook (struct bfd_link_info *info,
struct spu_link_hash_table *htab = spu_hash_table (info);
if (!info->relocatable
- && htab->num_overlays != 0
+ && htab->stub_sec != NULL
&& h != NULL
&& (h->root.type == bfd_link_hash_defined
|| h->root.type == bfd_link_hash_defweak)
&& h->def_regular
&& strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
{
- static Elf_Internal_Rela zero_rel;
- char *stub_name = spu_stub_name (h->root.u.def.section, h, &zero_rel);
- struct spu_stub_hash_entry *sh;
+ struct got_entry *g = h->got.glist;
- if (stub_name == NULL)
- return FALSE;
- sh = (struct spu_stub_hash_entry *)
- bfd_hash_lookup (&htab->stub_hash_table, stub_name, FALSE, FALSE);
- free (stub_name);
- if (sh == NULL)
- return TRUE;
- sym->st_shndx
- = _bfd_elf_section_from_bfd_section (htab->stub->output_section->owner,
- htab->stub->output_section);
- sym->st_value = (htab->stub->output_section->vma
- + htab->stub->output_offset
- + sh->off);
+ if (g != NULL && g->ovl == 0)
+ {
+ sym->st_shndx = (_bfd_elf_section_from_bfd_section
+ (htab->stub_sec[0]->output_section->owner,
+ htab->stub_sec[0]->output_section));
+ sym->st_value = g->stub_addr;
+ }
}
return TRUE;
@@ -3049,7 +2940,7 @@ spu_elf_modify_segment_map (bfd *abfd, struct bfd_link_info *info)
if (m->p_type == PT_LOAD && m->count > 1)
for (i = 0; i < m->count; i++)
if ((s = m->sections[i]) == toe
- || spu_elf_section_data (s)->ovl_index != 0)
+ || spu_elf_section_data (s)->u.o.ovl_index != 0)
{
struct elf_segment_map *m2;
bfd_vma amt;
@@ -3148,7 +3039,7 @@ spu_elf_modify_program_headers (bfd *abfd, struct bfd_link_info *info)
for (i = 0, m = elf_tdata (abfd)->segment_map; m; ++i, m = m->next)
if (m->count != 0
- && (o = spu_elf_section_data (m->sections[0])->ovl_index) != 0)
+ && (o = spu_elf_section_data (m->sections[0])->u.o.ovl_index) != 0)
{
/* Mark this as an overlay header. */
phdr[i].p_flags |= PF_OVERLAY;
@@ -3156,7 +3047,7 @@ spu_elf_modify_program_headers (bfd *abfd, struct bfd_link_info *info)
if (htab->ovtab != NULL && htab->ovtab->size != 0)
{
bfd_byte *p = htab->ovtab->contents;
- unsigned int off = (o - 1) * 16 + 8;
+ unsigned int off = o * 16 + 8;
/* Write file_off into _ovly_table. */
bfd_put_32 (htab->ovtab->owner, phdr[i].p_offset, p + off);
@@ -3226,7 +3117,6 @@ spu_elf_modify_program_headers (bfd *abfd, struct bfd_link_info *info)
#define elf_backend_link_output_symbol_hook spu_elf_output_symbol_hook
#define bfd_elf32_new_section_hook spu_elf_new_section_hook
#define bfd_elf32_bfd_link_hash_table_create spu_elf_link_hash_table_create
-#define bfd_elf32_bfd_link_hash_table_free spu_elf_link_hash_table_free
#define elf_backend_additional_program_headers spu_elf_additional_program_headers
#define elf_backend_modify_segment_map spu_elf_modify_segment_map
diff --git a/bfd/elf32-spu.h b/bfd/elf32-spu.h
index 1c421d9..4478e20 100644
--- a/bfd/elf32-spu.h
+++ b/bfd/elf32-spu.h
@@ -26,12 +26,20 @@ struct _spu_elf_section_data
{
struct bfd_elf_section_data elf;
- /* Stack analysis info kept for this section. */
+ union {
+ /* Info kept for input sections. */
+ struct {
+ /* Stack analysis info kept for this section. */
+ struct spu_elf_stack_info *stack_info;
+ } i;
- struct spu_elf_stack_info *stack_info;
-
- /* Non-zero for overlay output sections. */
- unsigned int ovl_index;
+ /* Info kept for output sections. */
+ struct {
+ /* Non-zero for overlay output sections. */
+ unsigned int ovl_index;
+ unsigned int ovl_buf;
+ } o;
+ } u;
};
#define spu_elf_section_data(sec) \
@@ -49,9 +57,8 @@ extern bfd_boolean spu_elf_open_builtin_lib (bfd **,
extern bfd_boolean spu_elf_create_sections (bfd *,
struct bfd_link_info *, int, int);
extern bfd_boolean spu_elf_find_overlays (bfd *, struct bfd_link_info *);
-extern bfd_boolean spu_elf_size_stubs (bfd *, struct bfd_link_info *, int, int,
- asection **, asection **,
- asection **);
-extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int,
- asection *);
+extern int spu_elf_size_stubs (bfd *, struct bfd_link_info *,
+ void (*) (asection *, asection *, const char *),
+ int);
+extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int);
extern asection *spu_elf_check_vma (bfd *, bfd_vma, bfd_vma);
diff --git a/ld/ChangeLog b/ld/ChangeLog
index 071e5b9..2a72f89 100644
--- a/ld/ChangeLog
+++ b/ld/ChangeLog
@@ -1,3 +1,15 @@
+2008-01-28 Alan Modra <amodra@bigpond.net.au>
+
+ * emultempl/spu_ovl.S: Rewrite.
+ * emultempl/spu_ovl.o: Regenerate.
+ * emultempl/spuelf.em (toe): Delete.
+ (spu_place_special_section): Add param to control section placement.
+ Adjust callers.
+ (spu_elf_load_ovl_mgr): Adjust for struct _spu_elf_section_data
+ changes.
+ (spu_before_allocation): Adjust spu_elf_size_stubs call.
+ (gld${EMULATION_NAME}_finish): Adjust spu_elf_build_stubs call.
+
2008-01-25 H.J. Lu <hongjiu.lu@intel.com>
PR ld/5670
diff --git a/ld/emultempl/spu_ovl.S b/ld/emultempl/spu_ovl.S
index 66dd69b..3f9c83b 100644
--- a/ld/emultempl/spu_ovl.S
+++ b/ld/emultempl/spu_ovl.S
@@ -19,295 +19,242 @@
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
-/**
- * MFC DMA defn's.
- */
+/* MFC DMA defn's. */
#define MFC_GET_CMD 0x40
#define MFC_MAX_DMA_SIZE 0x4000
#define MFC_TAG_UPDATE_ALL 2
#define MFC_TAG_ID 0
+/* Register usage. */
+#define reserved1 $75
+#define parm $75
+#define tab1 reserved1
+#define tab2 reserved1
+#define vma reserved1
+#define oldvma reserved1
+#define newmask reserved1
+#define map reserved1
+
+#define reserved2 $76
+#define off1 reserved2
+#define off2 reserved2
+#define present1 reserved2
+#define present2 reserved2
+#define sz reserved2
+#define cmp reserved2
+#define add64 reserved2
+#define cgbits reserved2
+#define off3 reserved2
+#define off4 reserved2
+#define off5 reserved2
+#define tagstat reserved2
+
+#define reserved3 $77
+#define buf1 reserved3
+#define buf2 reserved3
+#define rv3 reserved3
+#define ealo reserved3
+#define cmd reserved3
+#define off64 reserved3
+#define tab3 reserved3
+#define tab4 reserved3
+#define tab5 reserved3
+
+#define reserved4 $78
+#define ovl reserved4
+#define rv2 reserved4
+#define rv5 reserved4
+#define cgshuf reserved4
+#define newovl reserved4
+
+#define reserved5 $79
+#define target reserved5
+
+#define save1 $72
+#define rv4 save1
+#define rv7 save1
+#define tagid save1
+#define maxsize save1
+#define pbyte save1
+#define pbit save1
+
+#define save2 $73
+#define cur save2
+#define rv6 save2
+#define osize save2
+#define zovl save2
+#define oldovl save2
+#define newvma save2
+
+#define save3 $74
+#define rv1 save3
+#define ea64 save3
+#define buf3 save3
+#define genwi save3
+#define newmap save3
+#define oldmask save3
-/**
- * Temporary register allocations.
- * These are saved/restored here.
- */
-#define tab $75
-#define cgbits $75
-#define add64 $75
-#define ealo $75
-#define newmask $75
-#define tagstat $75
-#define bchn $75
-#define rv1 $75
-
-#define off $76
-#define off64 $76
-#define maxsize $76
-#define oldmask $76
-#define sz $76
-#define lnkr $76
-#define rv2 $76
-
-#define cur $77
-#define cmp $77
-#define buf $77
-#define genwi $77
-#define tagid $77
-#define cmd $77
-#define rv3 $77
-
-#define cgshuf $78
-
-#define vma $6
-
-#define map $7
-#define osize $7
-#define cmp2 $7
-
-#define ea64 $8
-#define retval $8
-
-#ifdef OVLY_IRQ_SAVE
-#define irqtmp $8
-#define irq_stat $9
-#endif
-
-# Stack quadword minux N
-#define SQWM1 -16*1
-#define SQWM2 -16*2
-#define SQWM3 -16*3
-#define SQWM4 -16*4
-#define SQWM5 -16*5
-#define SQWM6 -16*6
-#define SQWM7 -16*7
-#define SQWM8 -16*8
-#define SQWM9 -16*9
-#define SQWM10 -16*10
-#define SQWM11 -16*11
-#define SQWM12 -16*12
-#define SQWM13 -16*13
-#define SQWM14 -16*14
-#define SQWM15 -16*15
-#define SQWM16 -16*16
-
- .extern _ovly_table
- .extern _ovly_buf_table
-
-#ifdef OVLY_PRINTFS
-#define SPE_C99_VPRINTF 37
-__entry_event_format:
- .string "In entry_event_hook segment=0x%08x entry-address=0x%08x\n"
-__debug_event_format:
- .string "In debug_event_hook link-register=0x%08x %08x %08x %08x\n"
-__dma_event_format:
- .string "In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x\n"
-__ovly_buf_table_format:
- .string "_ovly_buf_table[%08x]=%08x\n"
-#endif
.text
- .align 4
- .type __rv_pattern, @object
- .size __rv_pattern, 16
+ .align 4
+ .type __rv_pattern, @object
+ .size __rv_pattern, 16
__rv_pattern:
- .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
- .type __cg_pattern, @object
- .size __cg_pattern, 16
+ .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
+
+ .type __cg_pattern, @object
+ .size __cg_pattern, 16
__cg_pattern:
- .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
+ .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
+
+ .type __ovly_current, @object
+ .size __ovly_current, 16
+__ovly_current:
+ .space 16
-/**
+/*
* __ovly_return - stub for returning from overlay functions.
*
- * inputs:
- * $lr link register
+ * On entry the four slots of $lr are:
+ * __ovly_return, prev ovl index, caller return addr, undefined.
*
- * outputs:
- * $78 old partition number, to be reloaded
- * $79 return address in old partion number
+ * Load the previous overlay and jump to the caller return address.
+ * Updates __ovly_current.
*/
- .global __ovly_return
- .type __ovly_return, @function
-
- .word 0
+ .align 4
+ .global __ovly_return
+ .type __ovly_return, @function
__ovly_return:
- shlqbyi $78, $lr, 4
- shlqbyi $79, $lr, 8
- biz $78, $79
- .size __ovly_return, . - __ovly_return
-
-/**
+ ila tab1, _ovly_table - 16 # 0,2 0
+ shlqbyi ovl, $lr, 4 # 1,4 0
+#nop
+ shlqbyi target, $lr, 8 # 1,4 1
+#nop; lnop
+#nop; lnop
+ shli off1, ovl, 4 # 0,4 4
+#lnop
+#nop
+ hbr ovly_ret9, target # 1,15 5
+#nop; lnop
+#nop; lnop
+#nop
+ lqx vma, tab1, off1 # 1,6 8
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+ rotqbyi buf1, vma, 12 # 1,4 14
+#nop
+ stqd save3, -48($sp) # 1,6 15
+#nop
+ stqd save2, -32($sp) # 1,6 16
+#nop
+ stqd save1, -16($sp) # 1,6 17
+ andi present1, buf1, 1 # 0,2 18
+ stqd ovl, (__ovly_current - __ovly_return)($lr) # 1,6 18
+#nop; lnop
+#nop
+ brz present1, __ovly_load_event # 1,4 20
+ovly_ret9:
+#nop
+ bi target # 1,4 21
+
+/*
* __ovly_load - copy an overlay partion to local store.
*
- * inputs:
- * $78 partition number to be loaded.
- * $79 branch target in new partition.
- * $lr link register, containing return addr.
+ * On entry $75 points to a word consisting of the overlay index in
+ * the top 14 bits, and the target address in the bottom 18 bits.
*
- * outputs:
- * $lr new link register, returning through __ovly_return.
- *
- * Copy a new overlay partition into local store, or return
- * immediately if the partition is already resident.
+ * Sets up $lr to return via __ovly_return.
+ * Updates __ovly_current.
*/
- .global __ovly_load
- .type __ovly_load, @function
-
+ .align 3
+ .global __ovly_load
+ .type __ovly_load, @function
__ovly_load:
-/* Save temporary registers to stack. */
- stqd $6, -16($sp)
- stqd $7, -32($sp)
- stqd $8, -48($sp)
-
-#ifdef OVLY_IRQ_SAVE
-/* Save irq state, then disable interrupts. */
- stqd $9, -64($sp)
- ila irqtmp, __ovly_irq_save
- rdch irq_stat, $SPU_RdMachStat
- bid irqtmp
-__ovly_irq_save:
-#endif
-
-#ifdef OVLY_PRINTFS
-//==============================================
-// In entry_event_hook segment=0x%08x entry-address=0x%08x
-//==============================================
-# save registers
- stqd $10, SQWM5($sp)
- stqd $11, SQWM6($sp)
- stqd $12, SQWM7($sp)
-# Place input parameters onto the stack to form the
-# local storage memory image.
- ila $10, __entry_event_format
- stqd $10, SQWM12($sp)
- ai $10, $sp, SQWM9
- stqd $10, SQWM11($sp)
- stqd $sp, SQWM10($sp)
- stqd $78, SQWM9($sp)
- stqd $79, SQWM8($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
- ila $10, 0x3ffff # address mask
- ilhu $11, SPE_C99_VPRINTF << 8
- ai $12, $sp, SQWM12 # parameter pointer
- selb $11, $11, $12, $10 # combine command & address ptr
- brsl $10, next1a
-next1a:
- .type next1a, @function
- lqr $12, message1a
- cwd $10, message1a-next1a($10)
- shufb $11, $11, $12, $10 # insert msg into inst word
- stqr $11, message1a # store cmd/ptr into msg word
- dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
- stop 0x2100
-message1a:
- .word 0
-
-# save registers
- stqd $13, SQWM8($sp)
- stqd $14, SQWM9($sp)
- stqd $15, SQWM10($sp)
- stqd $16, SQWM11($sp)
-
-# initialize loop
- il $13, 1
- ila $14, _ovly_buf_table
- ila $15, _ovly_buf_table_end
-
-loop_start1:
-# Place input parameters onto the stack to form the
-# local storage memory image.
- ila $10, __ovly_buf_table_format
- stqd $10, SQWM16($sp)
- ai $10, $sp, SQWM13
- stqd $10, SQWM15($sp)
- stqd $sp, SQWM14($sp)
- stqd $13, SQWM13($sp)
- lqd $16, 0($14)
- rotqby $16, $16, $14
- stqd $16, SQWM12($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
- ila $10, 0x3ffff # address mask
- ilhu $11, SPE_C99_VPRINTF << 8
- ai $12, $sp, SQWM16 # parameter pointer
- selb $11, $11, $12, $10 # combine command & address ptr
- brsl $10, next1b
-next1b:
- .type next1b, @function
- lqr $12, message1b
- cwd $10, message1b-next1b($10)
- shufb $11, $11, $12, $10 # insert msg into inst word
- stqr $11, message1b # store cmd/ptr into msg word
- dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
- stop 0x2100
-message1b:
- .word 0
-
-# move to next entry
- ai $13, $13, 1
- ai $14, $14, 4
- clgt $16, $15, $14
- brnz $16, loop_start1
-
-# restore registers
- lqd $16, SQWM11($sp)
- lqd $15, SQWM10($sp)
- lqd $14, SQWM9($sp)
- lqd $13, SQWM8($sp)
- lqd $12, SQWM7($sp)
- lqd $11, SQWM6($sp)
- lqd $10, SQWM5($sp)
-//==============================================
+#if OVL_STUB_SIZE == 8
+########
+#nop
+ lqd target, 0(parm) # 1,6 -11
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+ rotqby target, target, parm # 1,4 -5
+ ila tab2, _ovly_table - 16 # 0,2 -4
+ stqd save3, -48($sp) # 1,6 -4
+#nop
+ stqd save2, -32($sp) # 1,6 -3
+#nop
+ stqd save1, -16($sp) # 1,6 -2
+ rotmi ovl, target, -18 # 0,4 -1
+ hbr ovly_load9, target # 1,15 -1
+ ila rv1, __ovly_return # 0,2 0
+#lnop
+#nop; lnop
+#nop
+ lqd cur, (__ovly_current - __ovly_return)(rv1) # 1,6 2
+ shli off2, ovl, 4 # 0,4 3
+ stqd ovl, (__ovly_current - __ovly_return)(rv1) # 1,6 3
+ ceq rv2, $lr, rv1 # 0,2 4
+ lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4
+#nop; lnop
+#nop; lnop
+#nop
+ lqx vma, tab2, off2 # 1,6 7
+########
+#else /* OVL_STUB_SIZE == 16 */
+########
+ ila tab2, _ovly_table - 16 # 0,2 0
+ stqd save3, -48($sp) # 1,6 0
+ ila rv1, __ovly_return # 0,2 1
+ stqd save2, -32($sp) # 1,6 1
+ shli off2, ovl, 4 # 0,4 2
+ lqa cur, __ovly_current # 1,6 2
+ nop
+ stqa ovl, __ovly_current # 1,6 3
+ ceq rv2, $lr, rv1 # 0,2 4
+ lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4
+#nop
+ hbr ovly_load9, target # 1,15 5
+#nop
+ lqx vma, tab2, off2 # 1,6 6
+#nop
+ stqd save1, -16($sp) # 1,6 7
+########
#endif
-/* Set branch hint to overlay target. */
- hbr __ovly_load_ret, $79
-
-/* Get caller's overlay index by back chaining through stack frames.
- * Loop until end of stack (back chain all-zeros) or
- * encountered a link register we set here. */
- lqd bchn, 0($sp)
- ila retval, __ovly_return
-
-__ovly_backchain_loop:
- lqd lnkr, 16(bchn)
- lqd bchn, 0(bchn)
- ceq cmp, lnkr, retval
- ceqi cmp2, bchn, 0
- or cmp, cmp, cmp2
- brz cmp, __ovly_backchain_loop
-
-/* If we reached the zero back-chain, then lnkr is bogus. Clear the
- * part of lnkr that we use later (slot 3). */
- rotqbyi cmp2, cmp2, 4
- andc lnkr, lnkr, cmp2
-
-/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
- lqd rv1, (__rv_pattern-__ovly_return+4)(retval)
- shufb rv2, retval, lnkr, rv1
- shufb rv3, $lr, $78, rv1
- fsmbi rv1, 0xff
- selb rv2, rv2, rv3, rv1
-/* If we have a tail call from one overlay function to another overlay,
- then lr is already set up. Don't change it. */
- ceq rv1, $lr, retval
- fsmb rv1, rv1
- selb $lr, rv2, $lr, rv1
-
-/* Branch to $79 if non-overlay */
- brz $78, __ovly_load_restore
-
-/* Load values from _ovly_table[$78].
+#nop; lnop
+#nop; lnop
+#nop
+ shufb rv4, rv1, cur, rv3 # 1,4 10
+#nop
+ fsmb rv5, rv2 # 1,4 11
+#nop
+ rotqmbyi rv6, $lr, -8 # 1,4 12
+#nop
+ rotqbyi buf2, vma, 12 # 1,4 13
+#nop
+ lqd save3, -48($sp) # 1,6 14
+#nop; lnop
+ or rv7, rv4, rv6 # 0,2 16
+ lqd save2, -32($sp) # 1,6 16
+ andi present2, buf2, 1 # 0,2 17
+ lnop # 1,0 17
+ selb $lr, rv7, $lr, rv5 # 0,2 18
+ lqd save1, -16($sp) # 1,6 18
+#nop
+ brz present2, __ovly_load_event # 1,4 19
+ovly_load9:
+#nop
+ bi target # 1,4 20
+
+/* If we get here, we are about to load a new overlay.
+ * "vma" contains the relevant entry from _ovly_table[].
* extern struct {
* u32 vma;
* u32 size;
@@ -315,265 +262,166 @@ __ovly_backchain_loop:
* u32 buf;
* } _ovly_table[];
*/
- shli off, $78, 4
- ila tab, _ovly_table - 16
- lqx vma, tab, off
- rotqbyi buf, vma, 12
-
-/* Load values from _ovly_buf_table[buf].
- * extern struct {
- * u32 mapped;
- * } _ovly_buf_table[];
- */
- ila tab, _ovly_buf_table
- ai off, buf, -1
- shli off, off, 2
- lqx map, tab, off
- rotqby cur, map, off
-
-/* Branch to $79 now if overlay is already mapped. */
- ceq cmp, $78, cur
- brnz cmp, __ovly_load_restore
-
-/* Marker for profiling code. If we get here, we are about to load
- * a new overlay.
- */
- .global __ovly_load_event
- .type __ovly_load_event, @function
+ .align 3
+ .global __ovly_load_event
+ .type __ovly_load_event, @function
__ovly_load_event:
-
-/* Set _ovly_buf_table[buf].mapped = $78. */
- cwx genwi, tab, off
- shufb map, $78, map, genwi
- stqx map, tab, off
-
-/* A new partition needs to be loaded. Prepare for DMA loop.
- * _EAR_ is the 64b base EA, filled in at run time by the
- * loader, and indicating the value for SPU executable image start.
- */
- lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval)
- rotqbyi osize, vma, 4
- rotqbyi sz, vma, 8
- lqa ea64, _EAR_
-
+#nop
+ rotqbyi sz, vma, 8 # 1,4 0
+#nop
+ rotqbyi osize, vma, 4 # 1,4 1
+#nop
+ lqa ea64, _EAR_ # 1,6 2
+#nop
+ lqd cgshuf, (__cg_pattern - __ovly_return)($lr) # 1,6 3
+
+/* We could predict the branch at the end of this loop by adding a few
+ instructions, and there are plenty of free cycles to do so without
+ impacting loop execution time. However, it doesn't make a great
+ deal of sense since we need to wait for the dma to complete anyway. */
__ovly_xfer_loop:
-/* 64b add to compute next ea64. */
- rotqmbyi off64, sz, -4
- cg cgbits, ea64, off64
- shufb add64, cgbits, cgbits, cgshuf
- addx add64, ea64, off64
- ori ea64, add64, 0
-
-/* Setup DMA parameters, then issue DMA request. */
- rotqbyi ealo, add64, 4
- ila maxsize, MFC_MAX_DMA_SIZE
- cgt cmp, osize, maxsize
- selb sz, osize, maxsize, cmp
- ila tagid, MFC_TAG_ID
- wrch $MFC_LSA, vma
- wrch $MFC_EAH, ea64
- wrch $MFC_EAL, ealo
- wrch $MFC_Size, sz
- wrch $MFC_TagId, tagid
- ila cmd, MFC_GET_CMD
- wrch $MFC_Cmd, cmd
-
-#ifdef OVLY_PRINTFS
-//==============================================
-// In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x
-//==============================================
-# save registers
- stqd $10, SQWM5($sp)
- stqd $11, SQWM6($sp)
- stqd $12, SQWM7($sp)
-# Place input parameters onto the stack to form the
-# local storage memory image.
- ila $10, __dma_event_format
- stqd $10, SQWM14($sp)
- ai $10, $sp, SQWM11
- stqd $10, SQWM13($sp)
- stqd $sp, SQWM12($sp)
- stqd vma, SQWM11($sp)
- stqd ea64, SQWM10($sp)
- stqd ealo, SQWM9($sp)
- stqd sz, SQWM8($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
- ila $10, 0x3ffff # address mask
- ilhu $11, SPE_C99_VPRINTF << 8
- ai $12, $sp, SQWM14 # parameter pointer
- selb $11, $11, $12, $10 # combine command & address ptr
- brsl $10, next3a
-next3a:
- .type next3a, @function
- lqr $12, message3a
- cwd $10, message3a-next3a($10)
- shufb $11, $11, $12, $10 # insert msg into inst word
- stqr $11, message3a # store cmd/ptr into msg word
- dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
- stop 0x2100
-message3a:
- .word 0
-
-# restore registers
- lqd $12, SQWM7($sp)
- lqd $11, SQWM6($sp)
- lqd $10, SQWM5($sp)
-//==============================================
-#endif
-
-/* Increment vma, decrement size, branch back as needed. */
- a vma, vma, sz
- sf osize, sz, osize
- brnz osize, __ovly_xfer_loop
-
-/* Save app's tagmask, wait for DMA complete, restore mask. */
- rdch oldmask, $MFC_RdTagMask
+#nop
+ rotqmbyi off64, sz, -4 # 1,4 4
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ cg cgbits, ea64, off64 # 0,2 8
+#lnop
+#nop; lnop
+#nop
+ shufb add64, cgbits, cgbits, cgshuf # 1,4 10
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ addx add64, ea64, off64 # 0,2 14
+#lnop
+ ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
+ lnop
+ ori ea64, add64, 0 # 0,2 16
+ rotqbyi ealo, add64, 4 # 1,4 16
+ cgt cmp, osize, maxsize # 0,2 17
+ wrch $MFC_LSA, vma # 1,6 17
+#nop; lnop
+ selb sz, osize, maxsize, cmp # 0,2 19
+ wrch $MFC_EAH, ea64 # 1,6 19
+ ila tagid, MFC_TAG_ID # 0,2 20
+ wrch $MFC_EAL, ealo # 1,6 20
+ ila cmd, MFC_GET_CMD # 0,2 21
+ wrch $MFC_Size, sz # 1,6 21
+ sf osize, sz, osize # 0,2 22
+ wrch $MFC_TagId, tagid # 1,6 22
+ a vma, vma, sz # 0,2 23
+ wrch $MFC_Cmd, cmd # 1,6 23
+#nop
+ brnz osize, __ovly_xfer_loop # 1,4 24
+
+/* Now update our data structions while waiting for DMA to complete.
+ Low bit of .buf needs to be cleared on the _ovly_table entry
+ corresponding to the evicted overlay, and set on the entry for the
+ newly loaded overlay. Note that no overlay may in fact be evicted
+ as _ovly_buf_table[] starts with all zeros. Don't zap .buf entry
+ for zero index! Also of course update the _ovly_buf_table entry. */
+#nop
+ lqd newovl, (__ovly_current - __ovly_return)($lr) # 1,6 25
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ shli off3, newovl, 4 # 0,4 31
+#lnop
+ ila tab3, _ovly_table - 16 # 0,2 32
+#lnop
+#nop
+ fsmbi pbyte, 1 # 1,4 33
+#nop; lnop
+#nop
+ lqx vma, tab3, off3 # 1,6 35
+#nop; lnop
+ andi pbit, pbyte, 1 # 0,2 37
+ lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ or newvma, vma, pbit # 0,2 41
+ rotqbyi buf3, vma, 12 # 1,4 41
+#nop; lnop
+#nop
+ stqx newvma, tab3, off3 # 1,6 43
+#nop; lnop
+ shli off4, buf3, 2 # 1,4 45
+#lnop
+ ila tab4, _ovly_buf_table # 0,2 46
+#lnop
+#nop; lnop
+#nop; lnop
+#nop
+ lqx map, tab4, off4 # 1,6 49
+#nop
+ cwx genwi, tab4, off4 # 1,4 50
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+ rotqby oldovl, map, off4 # 1,4 55
+ nop
+ shufb newmap, newovl, map, genwi # 0,4 56
#if MFC_TAG_ID < 16
- ilh newmask, 1 << MFC_TAG_ID
+ ila newmask, 1 << MFC_TAG_ID # 0,2 57
#else
- ilhu newmask, 1 << (MFC_TAG_ID - 16)
-#endif
- wrch $MFC_WrTagMask, newmask
- ila tagstat, MFC_TAG_UPDATE_ALL
- wrch $MFC_WrTagUpdate, tagstat
- rdch tagstat, $MFC_RdTagStat
- sync
- wrch $MFC_WrTagMask, oldmask
-
-#ifdef OVLY_PRINTFS
-//==============================================
-// In debug_event_hook link-register=0x%08x %08x %08x %08x
-//==============================================
-# save registers
- stqd $10, SQWM5($sp)
- stqd $11, SQWM6($sp)
- stqd $12, SQWM7($sp)
-# Place input parameters onto the stack to form the
-# local storage memory image.
- ila $10, __debug_event_format
- stqd $10, SQWM14($sp)
- ai $10, $sp, SQWM11
- stqd $10, SQWM13($sp)
- stqd $sp, SQWM12($sp)
- stqd $lr, SQWM11($sp)
- rotqbyi $10, $lr, 4
- stqd $10, SQWM10($sp)
- rotqbyi $10, $10, 4
- stqd $10, SQWM9($sp)
- rotqbyi $10, $10, 4
- stqd $10, SQWM8($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
- ila $10, 0x3ffff # address mask
- ilhu $11, SPE_C99_VPRINTF << 8
- ai $12, $sp, SQWM14 # parameter pointer
- selb $11, $11, $12, $10 # combine command & address ptr
- brsl $10, next2a
-next2a:
- .type next2a, @function
- lqr $12, message2a
- cwd $10, message2a-next2a($10)
- shufb $11, $11, $12, $10 # insert msg into inst word
- stqr $11, message2a # store cmd/ptr into msg word
- dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
- stop 0x2100
-message2a:
- .word 0
-
-# save registers
- stqd $13, SQWM8($sp)
- stqd $14, SQWM9($sp)
- stqd $15, SQWM10($sp)
- stqd $16, SQWM11($sp)
-
-# initialize loop
- il $13, 1
- ila $14, _ovly_buf_table
- ila $15, _ovly_buf_table_end
-
-loop_start2:
-# Place input parameters onto the stack to form the
-# local storage memory image.
- ila $10, __ovly_buf_table_format
- stqd $10, SQWM16($sp)
- ai $10, $sp, SQWM13
- stqd $10, SQWM15($sp)
- stqd $sp, SQWM14($sp)
- stqd $13, SQWM13($sp)
- lqd $16, 0($14)
- rotqby $16, $16, $14
- stqd $16, SQWM12($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
- ila $10, 0x3ffff # address mask
- ilhu $11, SPE_C99_VPRINTF << 8
- ai $12, $sp, SQWM16 # parameter pointer
- selb $11, $11, $12, $10 # combine command & address ptr
- brsl $10, next2b
-next2b:
- .type next2b, @function
- lqr $12, message2b
- cwd $10, message2b-next2b($10)
- shufb $11, $11, $12, $10 # insert msg into inst word
- stqr $11, message2b # store cmd/ptr into msg word
- dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
- stop 0x2100
-message2b:
- .word 0
-
-# move to next entry
- ai $13, $13, 1
- ai $14, $14, 4
- clgt $16, $15, $14
- brnz $16, loop_start2
-
-# restore registers
- lqd $16, SQWM11($sp)
- lqd $15, SQWM10($sp)
- lqd $14, SQWM9($sp)
- lqd $13, SQWM8($sp)
- lqd $12, SQWM7($sp)
- lqd $11, SQWM6($sp)
- lqd $10, SQWM5($sp)
-//==============================================
+ ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
#endif
+#lnop
+#nop; lnop
+#nop; lnop
+ stqx newmap, tab4, off4 # 1,6 60
+
+/* Save app's tagmask, wait for DMA complete, restore mask. */
+ ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
+ rdch oldmask, $MFC_RdTagMask # 1,6 61
+#nop
+ wrch $MFC_WrTagMask, newmask # 1,6 62
+#nop
+ wrch $MFC_WrTagUpdate, tagstat # 1,6 63
+#nop
+ rdch tagstat, $MFC_RdTagStat # 1,6 64
+#nop
+ sync # 1,4 65
+/* Any hint prior to the sync is lost. A hint here allows the branch
+ to complete 15 cycles after the hint. With no hint the branch will
+ take 18 or 19 cycles. */
+ ila tab5, _ovly_table - 16 # 0,2 66
+ hbr do_load99, target # 1,15 66
+ shli off5, oldovl, 4 # 0,4 67
+ wrch $MFC_WrTagMask, oldmask # 1,6 67
+ ceqi zovl, oldovl, 0 # 0,2 68
+#lnop
+#nop; lnop
+#nop
+ fsm zovl, zovl # 1,4 70
+#nop
+ lqx oldvma, tab5, off5 # 1,6 71
+#nop
+ lqd save3, -48($sp) # 1,6 72
+#nop; lnop
+ andc pbit, pbit, zovl # 0,2 74
+ lqd save2, -32($sp) # 1,6 74
+#nop; lnop
+#nop; lnop
+ andc oldvma, oldvma, pbit # 0,2 77
+ lqd save1, -16($sp) # 1,6 77
+#nop; lnop
+ nop
+ stqx oldvma, tab5, off5 # 1,6 79
+#nop; lnop
- .global _ovly_debug_event
- .type _ovly_debug_event, @function
+ .global _ovly_debug_event
+ .type _ovly_debug_event, @function
_ovly_debug_event:
-/* GDB inserts debugger trap here. */
nop
-
-__ovly_load_restore:
-#ifdef OVLY_IRQ_SAVE
-/* Conditionally re-enable interrupts. */
- andi irq_stat, irq_stat, 1
- ila irqtmp, __ovly_irq_restore
- binze irq_stat, irqtmp
-__ovly_irq_restore:
- lqd $9, -64($sp)
-#endif
-
-/* Restore saved registers. */
- lqd $8, -48($sp)
- lqd $7, -32($sp)
- lqd $6, -16($sp)
-
-__ovly_load_ret:
/* Branch to target address. */
- bi $79
+do_load99:
+ bi target # 1,4 81
- .size __ovly_load, . - __ovly_load
+ .size __ovly_load, . - __ovly_load
diff --git a/ld/emultempl/spu_ovl.o b/ld/emultempl/spu_ovl.o
index a68eea3..d5b37e1 100644
--- a/ld/emultempl/spu_ovl.o
+++ b/ld/emultempl/spu_ovl.o
Binary files differ
diff --git a/ld/emultempl/spuelf.em b/ld/emultempl/spuelf.em
index e8333a4..7e618a5 100644
--- a/ld/emultempl/spuelf.em
+++ b/ld/emultempl/spuelf.em
@@ -58,8 +58,6 @@ static const struct _ovl_stream ovl_mgr_stream = {
ovl_mgr + sizeof (ovl_mgr)
};
-static asection *toe = NULL;
-
static int
is_spu_target (void)
@@ -84,7 +82,8 @@ spu_after_open (void)
gld${EMULATION_NAME}_after_open ();
}
-/* Add section S at the end of output section OUTPUT_NAME.
+/* If O is NULL, add section S at the end of output section OUTPUT_NAME.
+ If O is not NULL, add section S at the beginning of output section O.
Really, we should be duplicating ldlang.c map_input_to_output_sections
logic here, ie. using the linker script to find where the section
@@ -95,11 +94,11 @@ spu_after_open (void)
overlay manager code somewhere else. */
static void
-spu_place_special_section (asection *s, const char *output_name)
+spu_place_special_section (asection *s, asection *o, const char *output_name)
{
lang_output_section_statement_type *os;
- os = lang_output_section_find (output_name);
+ os = lang_output_section_find (o != NULL ? o->name : output_name);
if (os == NULL)
{
const char *save = s->name;
@@ -107,6 +106,15 @@ spu_place_special_section (asection *s, const char *output_name)
gld${EMULATION_NAME}_place_orphan (s);
s->name = save;
}
+ else if (o != NULL && os->children.head != NULL)
+ {
+ lang_statement_list_type add;
+
+ lang_list_init (&add);
+ lang_add_section (&add, s, os);
+ *add.tail = os->children.head;
+ os->children.head = add.head;
+ }
else
lang_add_section (&os->children, s, os);
@@ -154,7 +162,7 @@ spu_elf_load_ovl_mgr (void)
for (in = ovl_is->the_bfd->sections; in != NULL; in = in->next)
if ((in->flags & (SEC_ALLOC | SEC_LOAD))
== (SEC_ALLOC | SEC_LOAD))
- spu_place_special_section (in, ".text");
+ spu_place_special_section (in, NULL, ".text");
}
}
@@ -164,7 +172,7 @@ spu_elf_load_ovl_mgr (void)
os = os->next)
if (os->bfd_section != NULL
&& spu_elf_section_data (os->bfd_section) != NULL
- && spu_elf_section_data (os->bfd_section)->ovl_index != 0)
+ && spu_elf_section_data (os->bfd_section)->u.o.ovl_index != 0)
{
if (os->bfd_section->alignment_power < 4)
os->bfd_section->alignment_power = 4;
@@ -192,20 +200,15 @@ spu_before_allocation (void)
/* Find overlays by inspecting section vmas. */
if (spu_elf_find_overlays (output_bfd, &link_info))
{
- asection *stub, *ovtab;
+ int ret;
- if (!spu_elf_size_stubs (output_bfd, &link_info, non_overlay_stubs,
- stack_analysis, &stub, &ovtab, &toe))
+ ret = spu_elf_size_stubs (output_bfd, &link_info,
+ spu_place_special_section,
+ non_overlay_stubs);
+ if (ret == 0)
einfo ("%X%P: can not size overlay stubs: %E\n");
-
- if (stub != NULL)
- {
- spu_place_special_section (stub, ".text");
- spu_place_special_section (ovtab, ".data");
- spu_place_special_section (toe, ".toe");
-
- spu_elf_load_ovl_mgr ();
- }
+ else if (ret == 2)
+ spu_elf_load_ovl_mgr ();
}
/* We must not cache anything from the preliminary sizing. */
@@ -235,10 +238,8 @@ gld${EMULATION_NAME}_finish (void)
einfo ("%X%P: %A exceeds local store range\n", s);
}
- if (toe != NULL
- && !spu_elf_build_stubs (&link_info,
- emit_stub_syms || link_info.emitrelocations,
- toe))
+ if (!spu_elf_build_stubs (&link_info,
+ emit_stub_syms || link_info.emitrelocations))
einfo ("%X%P: can not build overlay stubs: %E\n");
finish_default ();
diff --git a/ld/testsuite/ChangeLog b/ld/testsuite/ChangeLog
index ec1018e..ed802de 100644
--- a/ld/testsuite/ChangeLog
+++ b/ld/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2008-01-28 Alan Modra <amodra@bigpond.net.au>
+
+ * ld-spu/ovl.d: Update.
+ * ld-spu/ovl2.d: Update.
+
2008-01-26 Alan Modra <amodra@bigpond.net.au>
* ld-elf/loadaddr.t: New, extracted from..
diff --git a/ld/testsuite/ld-spu/ovl.d b/ld/testsuite/ld-spu/ovl.d
index c624659..0cd8788 100644
--- a/ld/testsuite/ld-spu/ovl.d
+++ b/ld/testsuite/ld-spu/ovl.d
@@ -7,65 +7,90 @@
Disassembly of section \.text:
00000100 <_start>:
- 100: 1c f8 00 81 ai \$1,\$1,-32
- 104: 48 20 00 00 xor \$0,\$0,\$0
- 108: 24 00 00 80 stqd \$0,0\(\$1\)
- 10c: 24 00 40 80 stqd \$0,16\(\$1\)
- 110: 33 00 04 00 brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130
- 110: SPU_REL16 f1_a1
- 114: 33 00 04 80 brsl \$0,138 <00000000\.ovl_call\.f2_a1> # 138
- 114: SPU_REL16 f2_a1
- 118: 33 00 07 00 brsl \$0,150 <00000000\.ovl_call\.f1_a2> # 150
- 118: SPU_REL16 f1_a2
- 11c: 42 00 ac 09 ila \$9,344 # 158
- 11c: SPU_ADDR18 f2_a2
- 120: 35 20 04 80 bisl \$0,\$9
- 124: 1c 08 00 81 ai \$1,\$1,32 # 20
- 128: 32 7f fb 00 br 100 <_start> # 100
- 128: SPU_REL16 _start
+.* ai \$1,\$1,-32
+.* xor \$0,\$0,\$0
+.* stqd \$0,0\(\$1\)
+.* stqd \$0,16\(\$1\)
+.* brsl \$0,.* <00000000\.ovl_call\.f1_a1>.*
+.*SPU_REL16 f1_a1
+.* brsl \$0,.* <00000000\.ovl_call\.f2_a1>.*
+.*SPU_REL16 f2_a1
+.* brsl \$0,.* <00000000\.ovl_call\.f1_a2>.*
+.*SPU_REL16 f1_a2
+#.* ila \$9,328 # 148
+.* ila \$9,352 # 160
+.*SPU_ADDR18 f2_a2
+.* bisl \$0,\$9
+.* ai \$1,\$1,32 # 20
+.* br 100 <_start> # 100
+.*SPU_REL16 _start
0000012c <f0>:
- 12c: 35 00 00 00 bi \$0
+.* bi \$0
+
+#00000130 <00000000\.ovl_call\.f1_a1>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 04 04 00.*
+#
+#00000138 <00000000\.ovl_call\.f2_a1>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 04 04 04.*
+#
+#00000140 <00000000\.ovl_call\.f1_a2>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 08 04 00.*
+#
+#00000148 <00000000\.ovl_call\.f2_a2>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 08 04 24.*
+#
+#00000150 <00000000\.ovl_call\.f4_a1>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 04 04 10.*
+#
+#00000158 <00000000.ovl_call.14:8>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 08 04 34.*
00000130 <00000000\.ovl_call\.f1_a1>:
- 130: 42 02 00 4f ila \$79,1024 # 400
- 134: 32 00 02 80 br 148 .*
- 134: SPU_REL16 \*ABS\*\+0x148
-
-00000138 <00000000\.ovl_call\.f2_a1>:
- 138: 42 02 02 4f ila \$79,1028 # 404
- 13c: 32 00 01 80 br 148 .*
- 13c: SPU_REL16 \*ABS\*\+0x148
-
-00000140 <00000000\.ovl_call\.f4_a1>:
- 140: 42 02 08 4f ila \$79,1040 # 410
- 144: 40 20 00 00 nop \$0
- 148: 42 00 00 ce ila \$78,1
- 14c: 32 00 0a 80 br 1a0 <__ovly_load> # 1a0
- 14c: SPU_REL16 __ovly_load
-
-00000150 <00000000\.ovl_call\.f1_a2>:
- 150: 42 02 00 4f ila \$79,1024 # 400
- 154: 32 00 02 80 br 168 .*
- 154: SPU_REL16 \*ABS\*\+0x168
-
-00000158 <00000000\.ovl_call\.f2_a2>:
- 158: 42 02 12 4f ila \$79,1060 # 424
- 15c: 32 00 01 80 br 168 .*
- 15c: SPU_REL16 \*ABS\*\+0x168
-
-00000160 <00000000\.ovl_call\.14:8>:
- 160: 42 02 1a 4f ila \$79,1076 # 434
- 164: 40 20 00 00 nop \$0
- 168: 42 00 01 4e ila \$78,2
- 16c: 32 00 06 80 br 1a0 <__ovly_load> # 1a0
- 16c: SPU_REL16 __ovly_load
+.* ila \$78,1
+.* lnop
+.* ila \$79,1024 # 400
+.* br .* <__ovly_load>.*
+
+00000140 <00000000\.ovl_call\.f2_a1>:
+.* ila \$78,1
+.* lnop
+.* ila \$79,1028 # 404
+.* br .* <__ovly_load>.*
+
+00000150 <00000000.ovl_call.f1_a2>:
+.* ila \$78,2
+.* lnop
+.* ila \$79,1024 # 400
+.* br .* <__ovly_load>.*
+
+00000160 <00000000\.ovl_call\.f2_a2>:
+.* ila \$78,2
+.* lnop
+.* ila \$79,1060 # 424
+.* br .* <__ovly_load>.*
+
+00000170 <00000000\.ovl_call\.f4_a1>:
+.* ila \$78,1
+.* lnop
+.* ila \$79,1040 # 410
+.* br .* <__ovly_load>.*
+
+00000180 <00000000.ovl_call.14:8>:
+.* ila \$78,2
+.* lnop
+.* ila \$79,1076 # 434
+.* br .* <__ovly_load>.*
+
#...
[0-9a-f]+ <__ovly_return>:
-[0-9a-f ]+: 3f e1 00 4e shlqbyi \$78,\$0,4
-[0-9a-f ]+: 3f e2 00 4f shlqbyi \$79,\$0,8
-[0-9a-f ]+: 25 00 27 ce biz \$78,\$79
-
+#...
[0-9a-f]+ <__ovly_load>:
#...
[0-9a-f]+ <_ovly_debug_event>:
@@ -73,67 +98,75 @@ Disassembly of section \.text:
Disassembly of section \.ov_a1:
00000400 <f1_a1>:
- 400: 32 00 01 80 br 40c <f3_a1> # 40c
- 400: SPU_REL16 f3_a1
+.* br .* <f3_a1>.*
+.*SPU_REL16 f3_a1
00000404 <f2_a1>:
- 404: 42 00 a0 03 ila \$3,320 # 140
- 404: SPU_ADDR18 f4_a1
- 408: 35 00 00 00 bi \$0
+#.* ila \$3,336 # 150
+.* ila \$3,368 # 170
+.*SPU_ADDR18 f4_a1
+.* bi \$0
0000040c <f3_a1>:
- 40c: 35 00 00 00 bi \$0
+.* bi \$0
00000410 <f4_a1>:
- 410: 35 00 00 00 bi \$0
+.* bi \$0
\.\.\.
Disassembly of section \.ov_a2:
00000400 <f1_a2>:
- 400: 24 00 40 80 stqd \$0,16\(\$1\)
- 404: 24 ff 80 81 stqd \$1,-32\(\$1\)
- 408: 1c f8 00 81 ai \$1,\$1,-32
- 40c: 33 7f a4 00 brsl \$0,12c <f0> # 12c
- 40c: SPU_REL16 f0
- 410: 33 7f a4 00 brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130
- 410: SPU_REL16 f1_a1
- 414: 33 00 03 80 brsl \$0,430 <f3_a2> # 430
- 414: SPU_REL16 f3_a2
- 418: 34 00 c0 80 lqd \$0,48\(\$1\) # 30
- 41c: 1c 08 00 81 ai \$1,\$1,32 # 20
- 420: 35 00 00 00 bi \$0
+.* stqd \$0,16\(\$1\)
+.* stqd \$1,-32\(\$1\)
+.* ai \$1,\$1,-32
+.* brsl \$0,12c <f0> # 12c
+.*SPU_REL16 f0
+.* brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130
+.*SPU_REL16 f1_a1
+.* brsl \$0,430 <f3_a2> # 430
+.*SPU_REL16 f3_a2
+.* lqd \$0,48\(\$1\) # 30
+.* ai \$1,\$1,32 # 20
+.* bi \$0
00000424 <f2_a2>:
- 424: 41 00 00 03 ilhu \$3,0
- 424: SPU_ADDR16_HI f4_a2
- 428: 60 80 b0 03 iohl \$3,352 # 160
- 428: SPU_ADDR16_LO f4_a2
- 42c: 35 00 00 00 bi \$0
+.* ilhu \$3,0
+.*SPU_ADDR16_HI f4_a2
+#.* iohl \$3,344 # 158
+.* iohl \$3,384 # 180
+.*SPU_ADDR16_LO f4_a2
+.* bi \$0
00000430 <f3_a2>:
- 430: 35 00 00 00 bi \$0
+.* bi \$0
00000434 <f4_a2>:
- 434: 32 7f ff 80 br 430 <f3_a2> # 430
- 434: SPU_REL16 f3_a2
+.* br .* <f3_a2>.*
+.*SPU_REL16 f3_a2
\.\.\.
Disassembly of section .data:
-00000440 <_ovly_table>:
- 440: 00 00 04 00 .*
- 444: 00 00 00 20 .*
- 448: 00 00 02 f0 .*
+00000440 <_ovly_table-0x10>:
+ \.\.\.
44c: 00 00 00 01 .*
+00000450 <_ovly_table>:
450: 00 00 04 00 .*
- 454: 00 00 00 40 .*
- 458: 00 00 03 10 .*
- 45c: 00 00 00 01 .*
+ 454: 00 00 00 20 .*
+# 458: 00 00 03 40 .*
+ 458: 00 00 03 70 .*
+ 45c: 00 00 00 02 .*
+ 460: 00 00 04 00 .*
+ 464: 00 00 00 40 .*
+# 468: 00 00 03 60 .*
+ 468: 00 00 03 90 .*
+ 46c: 00 00 00 02 .*
+
+00000470 <_ovly_buf_table>:
+ \.\.\.
-00000460 <_ovly_buf_table>:
- 460: 00 00 00 00 .*
Disassembly of section \.toe:
-00000470 <_EAR_>:
+00000480 <_EAR_>:
\.\.\.
Disassembly of section \.note\.spu_name:
diff --git a/ld/testsuite/ld-spu/ovl2.d b/ld/testsuite/ld-spu/ovl2.d
index bf62e03..1cd1d33 100644
--- a/ld/testsuite/ld-spu/ovl2.d
+++ b/ld/testsuite/ld-spu/ovl2.d
@@ -7,40 +7,50 @@
Disassembly of section \.text:
00000100 <_start>:
- 100: 33 00 06 00 brsl \$0,130 <00000000\.ovl_call\.f1_a1> # 130
- 100: SPU_REL16 f1_a1
- 104: 33 00 03 80 brsl \$0,120 <00000000\.ovl_call\.10:4> # 120
- 104: SPU_REL16 setjmp
- 108: 32 7f ff 00 br 100 <_start> # 100
- 108: SPU_REL16 _start
+.* brsl \$0,.* <00000000\.ovl_call\.f1_a1>.*
+.*SPU_REL16 f1_a1
+.* brsl \$0,.* <00000000\.ovl_call\.10:4>.*
+.*SPU_REL16 setjmp
+.* br 100 <_start> # 100
+.*SPU_REL16 _start
0000010c <setjmp>:
- 10c: 35 00 00 00 bi \$0
+.* bi \$0
00000110 <longjmp>:
- 110: 35 00 00 00 bi \$0
- ...
-
-00000120 <00000000\.ovl_call.10:4>:
- 120: 42 00 86 4f ila \$79,268 # 10c
- 124: 40 20 00 00 nop \$0
- 128: 42 00 00 4e ila \$78,0
- 12c: 32 00 0a 80 br 180 <__ovly_load> # 180
- 12c: SPU_REL16 __ovly_load
-
-00000130 <00000000\.ovl_call.f1_a1>:
- 130: 42 02 00 4f ila \$79,1024 # 400
- 134: 40 20 00 00 nop \$0
- 138: 42 00 00 ce ila \$78,1
- 13c: 32 00 08 80 br 180 <__ovly_load> # 180
- 13c: SPU_REL16 __ovly_load
+.* bi \$0
+.*
+
+#00000118 <00000000\.ovl_call.f1_a1>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 04 04 00.*
+#
+#00000120 <00000000\.ovl_call.10:4>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 00 01 0c.*
+#
+#00000128 <_SPUEAR_f1_a2>:
+#.* brsl \$75,.* <__ovly_load>.*
+#.*00 08 04 00.*
+
+00000120 <00000000\.ovl_call.f1_a1>:
+.* ila \$78,1
+.* lnop
+.* ila \$79,1024 # 400
+.* br .* <__ovly_load>.*
+
+00000130 <00000000\.ovl_call.10:4>:
+.* ila \$78,0
+.* lnop
+.* ila \$79,268 # 10c
+.* br .* <__ovly_load>.*
00000140 <_SPUEAR_f1_a2>:
- 140: 42 02 00 4f ila \$79,1024 # 400
- 144: 40 20 00 00 nop \$0
- 148: 42 00 01 4e ila \$78,2
- 14c: 32 00 06 80 br 180 <__ovly_load> # 180
- 14c: SPU_REL16 __ovly_load
+.* ila \$78,2
+.* lnop
+.* ila \$79,1024 # 400
+.* br .* <__ovly_load>.*
+
#...
Disassembly of section \.ov_a1:
@@ -55,21 +65,27 @@ Disassembly of section \.ov_a2:
\.\.\.
Disassembly of section \.data:
-00000410 <_ovly_table>:
- 410: 00 00 04 00 .*
- 414: 00 00 00 10 .*
- 418: 00 00 02 d0 .*
+00000410 <_ovly_table-0x10>:
+ \.\.\.
41c: 00 00 00 01 .*
+00000420 <_ovly_table>:
420: 00 00 04 00 .*
424: 00 00 00 10 .*
- 428: 00 00 02 e0 .*
- 42c: 00 00 00 01 .*
+# 428: 00 00 03 10 .*
+ 428: 00 00 03 30 .*
+ 42c: 00 00 00 02 .*
+ 430: 00 00 04 00 .*
+ 434: 00 00 00 10 .*
+# 438: 00 00 03 20 .*
+ 438: 00 00 03 40 .*
+ 43c: 00 00 00 02 .*
+
+00000440 <_ovly_buf_table>:
+ \.\.\.
-00000430 <_ovly_buf_table>:
- 430: 00 00 00 00 .*
Disassembly of section \.toe:
-00000440 <_EAR_>:
+00000450 <_EAR_>:
\.\.\.
Disassembly of section \.note\.spu_name: