diff options
-rw-r--r-- | bfd/ChangeLog | 7 | ||||
-rw-r--r-- | bfd/elf32-spu.c | 12 | ||||
-rw-r--r-- | ld/ChangeLog | 8 | ||||
-rw-r--r-- | ld/emultempl/spu_ovl.S | 55 | ||||
-rw-r--r-- | ld/emultempl/spu_ovl.o | bin | 1524 -> 1500 bytes |
5 files changed, 50 insertions, 32 deletions
diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 6c9e799..6c11aec 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,10 @@ +2008-02-07 Alan Modra <amodra@bigpond.net.au> + + * elf32-spu.c (spu_elf_size_stubs): Revert 2008-01-28 doubling + of _ovly_buf_table size. + (spu_elf_build_stubs): Use low bit of .size as "present" bit. + Adjust initialisations relating to _ovly_buf_table. + 2008-02-04 Bob Wilson <bob.wilson@acm.org> * elf32-xtensa (elf_xtensa_relocate_section): After finding an invalid diff --git a/bfd/elf32-spu.c b/bfd/elf32-spu.c index 04de774..826512f 100644 --- a/bfd/elf32-spu.c +++ b/bfd/elf32-spu.c @@ -1202,7 +1202,7 @@ spu_elf_size_stubs (bfd *output_bfd, || !bfd_set_section_alignment (ibfd, htab->ovtab, 4)) return 0; - htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4; + htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 4; (*place_spu_section) (htab->ovtab, NULL, ".data"); htab->toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC); @@ -1373,8 +1373,8 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms) /* Write out _ovly_table. */ p = htab->ovtab->contents; - /* set low bit of .buf to mark non-overlay area as present. */ - p[15] = 1; + /* set low bit of .size to mark non-overlay area as present. */ + p[7] = 1; for (s = obfd->sections; s != NULL; s = s->next) { unsigned int ovl_index = spu_elf_section_data (s)->u.o.ovl_index; @@ -1387,7 +1387,7 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms) bfd_put_32 (htab->ovtab->owner, s->vma, p + off); bfd_put_32 (htab->ovtab->owner, (s->size + 15) & -16, p + off + 4); /* file_off written later in spu_elf_modify_program_headers. */ - bfd_put_32 (htab->ovtab->owner, ovl_buf * 2, p + off + 12); + bfd_put_32 (htab->ovtab->owner, ovl_buf, p + off + 12); } } @@ -1407,12 +1407,12 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms) if (h == NULL) return FALSE; h->root.u.def.value = htab->num_overlays * 16 + 16; - h->size = htab->num_buf * 2 * 4; + h->size = htab->num_buf * 4; h = define_ovtab_symbol (htab, "_ovly_buf_table_end"); if (h == NULL) return FALSE; - h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4; + h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 4; h->size = 0; h = define_ovtab_symbol (htab, "_EAR_"); diff --git a/ld/ChangeLog b/ld/ChangeLog index 06bde56..3407aa6 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,11 @@ +2008-02-07 Alan Modra <amodra@bigpond.net.au> + + * emultempl/spu_ovl.S: Use low bit of _ovly_table.size as + a "present" bit rather than low bit of .buf. Correct indexing + into _ovly_buf_table. Use relative loads and stores to access + overlay manager local vars. + * emultempl/spu_ovl.o: Regenerate. + 2008-02-04 H.J. Lu <hongjiu.lu@intel.com> PR 5715 diff --git a/ld/emultempl/spu_ovl.S b/ld/emultempl/spu_ovl.S index 3f9c83b..92304b2 100644 --- a/ld/emultempl/spu_ovl.S +++ b/ld/emultempl/spu_ovl.S @@ -1,6 +1,6 @@ /* Overlay manager for SPU. - Copyright 2006, 2007 Free Software Foundation, Inc. + Copyright 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of the GNU Binutils. @@ -46,12 +46,13 @@ #define cgbits reserved2 #define off3 reserved2 #define off4 reserved2 +#define addr4 reserved2 #define off5 reserved2 #define tagstat reserved2 #define reserved3 $77 -#define buf1 reserved3 -#define buf2 reserved3 +#define size1 reserved3 +#define size2 reserved3 #define rv3 reserved3 #define ealo reserved3 #define cmd reserved3 @@ -145,18 +146,18 @@ __ovly_return: #nop; lnop #nop; lnop #nop - rotqbyi buf1, vma, 12 # 1,4 14 + rotqbyi size1, vma, 4 # 1,4 14 #nop stqd save3, -48($sp) # 1,6 15 #nop stqd save2, -32($sp) # 1,6 16 #nop stqd save1, -16($sp) # 1,6 17 - andi present1, buf1, 1 # 0,2 18 - stqd ovl, (__ovly_current - __ovly_return)($lr) # 1,6 18 + andi present1, size1, 1 # 0,2 18 + stqr ovl, __ovly_current # 1,6 18 #nop; lnop #nop - brz present1, __ovly_load_event # 1,4 20 + brz present1, do_load # 1,4 20 ovly_ret9: #nop bi target # 1,4 21 @@ -197,11 +198,11 @@ __ovly_load: #lnop #nop; lnop #nop - lqd cur, (__ovly_current - __ovly_return)(rv1) # 1,6 2 + lqr cur, __ovly_current # 1,6 2 shli off2, ovl, 4 # 0,4 3 - stqd ovl, (__ovly_current - __ovly_return)(rv1) # 1,6 3 + stqr ovl, __ovly_current # 1,6 3 ceq rv2, $lr, rv1 # 0,2 4 - lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 + lqr rv3, __rv_pattern # 1,6 4 #nop; lnop #nop; lnop #nop @@ -214,11 +215,11 @@ __ovly_load: ila rv1, __ovly_return # 0,2 1 stqd save2, -32($sp) # 1,6 1 shli off2, ovl, 4 # 0,4 2 - lqa cur, __ovly_current # 1,6 2 + lqr cur, __ovly_current # 1,6 2 nop - stqa ovl, __ovly_current # 1,6 3 + stqr ovl, __ovly_current # 1,6 3 ceq rv2, $lr, rv1 # 0,2 4 - lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 + lqr rv3, __rv_pattern # 1,6 4 #nop hbr ovly_load9, target # 1,15 5 #nop @@ -237,18 +238,18 @@ __ovly_load: #nop rotqmbyi rv6, $lr, -8 # 1,4 12 #nop - rotqbyi buf2, vma, 12 # 1,4 13 + rotqbyi size2, vma, 4 # 1,4 13 #nop lqd save3, -48($sp) # 1,6 14 #nop; lnop or rv7, rv4, rv6 # 0,2 16 lqd save2, -32($sp) # 1,6 16 - andi present2, buf2, 1 # 0,2 17 + andi present2, size2, 1 # 0,2 17 lnop # 1,0 17 selb $lr, rv7, $lr, rv5 # 0,2 18 lqd save1, -16($sp) # 1,6 18 #nop - brz present2, __ovly_load_event # 1,4 19 + brz present2, do_load # 1,4 19 ovly_load9: #nop bi target # 1,4 20 @@ -266,6 +267,7 @@ ovly_load9: .global __ovly_load_event .type __ovly_load_event, @function __ovly_load_event: +do_load: #nop rotqbyi sz, vma, 8 # 1,4 0 #nop @@ -273,7 +275,7 @@ __ovly_load_event: #nop lqa ea64, _EAR_ # 1,6 2 #nop - lqd cgshuf, (__cg_pattern - __ovly_return)($lr) # 1,6 3 + lqr cgshuf, __cg_pattern # 1,6 3 /* We could predict the branch at the end of this loop by adding a few instructions, and there are plenty of free cycles to do so without @@ -316,13 +318,13 @@ __ovly_xfer_loop: brnz osize, __ovly_xfer_loop # 1,4 24 /* Now update our data structions while waiting for DMA to complete. - Low bit of .buf needs to be cleared on the _ovly_table entry + Low bit of .size needs to be cleared on the _ovly_table entry corresponding to the evicted overlay, and set on the entry for the newly loaded overlay. Note that no overlay may in fact be evicted - as _ovly_buf_table[] starts with all zeros. Don't zap .buf entry + as _ovly_buf_table[] starts with all zeros. Don't zap .size entry for zero index! Also of course update the _ovly_buf_table entry. */ #nop - lqd newovl, (__ovly_current - __ovly_return)($lr) # 1,6 25 + lqr newovl, __ovly_current # 1,6 25 #nop; lnop #nop; lnop #nop; lnop @@ -333,7 +335,7 @@ __ovly_xfer_loop: ila tab3, _ovly_table - 16 # 0,2 32 #lnop #nop - fsmbi pbyte, 1 # 1,4 33 + fsmbi pbyte, 0x100 # 1,4 33 #nop; lnop #nop lqx vma, tab3, off3 # 1,6 35 @@ -351,7 +353,7 @@ __ovly_xfer_loop: #nop; lnop shli off4, buf3, 2 # 1,4 45 #lnop - ila tab4, _ovly_buf_table # 0,2 46 + ila tab4, _ovly_buf_table - 4 # 0,2 46 #lnop #nop; lnop #nop; lnop @@ -359,13 +361,14 @@ __ovly_xfer_loop: lqx map, tab4, off4 # 1,6 49 #nop cwx genwi, tab4, off4 # 1,4 50 -#nop; lnop + a addr4, tab4, off4 # 0,2 51 +#lnop #nop; lnop #nop; lnop #nop; lnop #nop - rotqby oldovl, map, off4 # 1,4 55 - nop + rotqby oldovl, map, addr4 # 1,4 55 +#nop shufb newmap, newovl, map, genwi # 0,4 56 #if MFC_TAG_ID < 16 ila newmask, 1 << MFC_TAG_ID # 0,2 57 @@ -375,7 +378,7 @@ __ovly_xfer_loop: #lnop #nop; lnop #nop; lnop - stqx newmap, tab4, off4 # 1,6 60 + stqd newmap, 0(addr4) # 1,6 60 /* Save app's tagmask, wait for DMA complete, restore mask. */ ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61 diff --git a/ld/emultempl/spu_ovl.o b/ld/emultempl/spu_ovl.o Binary files differindex d5b37e1..4fe2b25 100644 --- a/ld/emultempl/spu_ovl.o +++ b/ld/emultempl/spu_ovl.o |