/* Overlay manager for SPU. Copyright 2006, 2007 Free Software Foundation, Inc. This file is part of the GNU Binutils. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ /** * MFC DMA defn's. */ #define MFC_GET_CMD 0x40 #define MFC_MAX_DMA_SIZE 0x4000 #define MFC_TAG_UPDATE_ALL 2 #define MFC_TAG_ID 0 /** * Temporary register allocations. * These are saved/restored here. */ #define tab $75 #define cgbits $75 #define add64 $75 #define ealo $75 #define newmask $75 #define tagstat $75 #define bchn $75 #define rv1 $75 #define off $76 #define off64 $76 #define maxsize $76 #define oldmask $76 #define sz $76 #define lnkr $76 #define rv2 $76 #define cur $77 #define cmp $77 #define buf $77 #define genwi $77 #define tagid $77 #define cmd $77 #define rv3 $77 #define cgshuf $78 #define vma $6 #define map $7 #define osize $7 #define cmp2 $7 #define ea64 $8 #define retval $8 #ifdef OVLY_IRQ_SAVE #define irqtmp $8 #define irq_stat $9 #endif # Stack quadword minux N #define SQWM1 -16*1 #define SQWM2 -16*2 #define SQWM3 -16*3 #define SQWM4 -16*4 #define SQWM5 -16*5 #define SQWM6 -16*6 #define SQWM7 -16*7 #define SQWM8 -16*8 #define SQWM9 -16*9 #define SQWM10 -16*10 #define SQWM11 -16*11 #define SQWM12 -16*12 #define SQWM13 -16*13 #define SQWM14 -16*14 #define SQWM15 -16*15 #define SQWM16 -16*16 .extern _ovly_table .extern _ovly_buf_table #ifdef OVLY_PRINTFS #define SPE_C99_VPRINTF 37 __entry_event_format: .string "In entry_event_hook segment=0x%08x entry-address=0x%08x\n" __debug_event_format: .string "In debug_event_hook link-register=0x%08x %08x %08x %08x\n" __dma_event_format: .string "In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x\n" __ovly_buf_table_format: .string "_ovly_buf_table[%08x]=%08x\n" #endif .text .align 4 .type __rv_pattern, @object .size __rv_pattern, 16 __rv_pattern: .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213 .type __cg_pattern, @object .size __cg_pattern, 16 __cg_pattern: .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 /** * __ovly_return - stub for returning from overlay functions. * * inputs: * $lr link register * * outputs: * $78 old partition number, to be reloaded * $79 return address in old partion number */ .global __ovly_return .type __ovly_return, @function .word 0 __ovly_return: shlqbyi $78, $lr, 4 shlqbyi $79, $lr, 8 biz $78, $79 .size __ovly_return, . - __ovly_return /** * __ovly_load - copy an overlay partion to local store. * * inputs: * $78 partition number to be loaded. * $79 branch target in new partition. * $lr link register, containing return addr. * * outputs: * $lr new link register, returning through __ovly_return. * * Copy a new overlay partition into local store, or return * immediately if the partition is already resident. */ .global __ovly_load .type __ovly_load, @function __ovly_load: /* Save temporary registers to stack. */ stqd $6, -16($sp) stqd $7, -32($sp) stqd $8, -48($sp) #ifdef OVLY_IRQ_SAVE /* Save irq state, then disable interrupts. */ stqd $9, -64($sp) ila irqtmp, __ovly_irq_save rdch irq_stat, $SPU_RdMachStat bid irqtmp __ovly_irq_save: #endif #ifdef OVLY_PRINTFS //============================================== // In entry_event_hook segment=0x%08x entry-address=0x%08x //============================================== # save registers stqd $10, SQWM5($sp) stqd $11, SQWM6($sp) stqd $12, SQWM7($sp) # Place input parameters onto the stack to form the # local storage memory image. ila $10, __entry_event_format stqd $10, SQWM12($sp) ai $10, $sp, SQWM9 stqd $10, SQWM11($sp) stqd $sp, SQWM10($sp) stqd $78, SQWM9($sp) stqd $79, SQWM8($sp) # Construct a message consisting of the 8-bit opcode # and 24-bit local store pointer to the input # parameters and place it forllowing the stop and signal ila $10, 0x3ffff # address mask ilhu $11, SPE_C99_VPRINTF << 8 ai $12, $sp, SQWM12 # parameter pointer selb $11, $11, $12, $10 # combine command & address ptr brsl $10, next1a next1a: .type next1a, @function lqr $12, message1a cwd $10, message1a-next1a($10) shufb $11, $11, $12, $10 # insert msg into inst word stqr $11, message1a # store cmd/ptr into msg word dsync # Notify the PPE to perform the assisted call request # by issing a stop and signal with a signal code # of 0x2100 (C99 class) stop 0x2100 message1a: .word 0 # save registers stqd $13, SQWM8($sp) stqd $14, SQWM9($sp) stqd $15, SQWM10($sp) stqd $16, SQWM11($sp) # initialize loop il $13, 1 ila $14, _ovly_buf_table ila $15, _ovly_buf_table_end loop_start1: # Place input parameters onto the stack to form the # local storage memory image. ila $10, __ovly_buf_table_format stqd $10, SQWM16($sp) ai $10, $sp, SQWM13 stqd $10, SQWM15($sp) stqd $sp, SQWM14($sp) stqd $13, SQWM13($sp) lqd $16, 0($14) rotqby $16, $16, $14 stqd $16, SQWM12($sp) # Construct a message consisting of the 8-bit opcode # and 24-bit local store pointer to the input # parameters and place it forllowing the stop and signal ila $10, 0x3ffff # address mask ilhu $11, SPE_C99_VPRINTF << 8 ai $12, $sp, SQWM16 # parameter pointer selb $11, $11, $12, $10 # combine command & address ptr brsl $10, next1b next1b: .type next1b, @function lqr $12, message1b cwd $10, message1b-next1b($10) shufb $11, $11, $12, $10 # insert msg into inst word stqr $11, message1b # store cmd/ptr into msg word dsync # Notify the PPE to perform the assisted call request # by issing a stop and signal with a signal code # of 0x2100 (C99 class) stop 0x2100 message1b: .word 0 # move to next entry ai $13, $13, 1 ai $14, $14, 4 clgt $16, $15, $14 brnz $16, loop_start1 # restore registers lqd $16, SQWM11($sp) lqd $15, SQWM10($sp) lqd $14, SQWM9($sp) lqd $13, SQWM8($sp) lqd $12, SQWM7($sp) lqd $11, SQWM6($sp) lqd $10, SQWM5($sp) //============================================== #endif /* Set branch hint to overlay target. */ hbr __ovly_load_ret, $79 /* Get caller's overlay index by back chaining through stack frames. * Loop until end of stack (back chain all-zeros) or * encountered a link register we set here. */ lqd bchn, 0($sp) ila retval, __ovly_return __ovly_backchain_loop: lqd lnkr, 16(bchn) lqd bchn, 0(bchn) ceq cmp, lnkr, retval ceqi cmp2, bchn, 0 or cmp, cmp, cmp2 brz cmp, __ovly_backchain_loop /* If we reached the zero back-chain, then lnkr is bogus. Clear the * part of lnkr that we use later (slot 3). */ rotqbyi cmp2, cmp2, 4 andc lnkr, lnkr, cmp2 /* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */ lqd rv1, (__rv_pattern-__ovly_return+4)(retval) shufb rv2, retval, lnkr, rv1 shufb rv3, $lr, $78, rv1 fsmbi rv1, 0xff selb rv2, rv2, rv3, rv1 /* If we have a tail call from one overlay function to another overlay, then lr is already set up. Don't change it. */ ceq rv1, $lr, retval fsmb rv1, rv1 selb $lr, rv2, $lr, rv1 /* Branch to $79 if non-overlay */ brz $78, __ovly_load_restore /* Load values from _ovly_table[$78]. * extern struct { * u32 vma; * u32 size; * u32 file_offset; * u32 buf; * } _ovly_table[]; */ shli off, $78, 4 ila tab, _ovly_table - 16 lqx vma, tab, off rotqbyi buf, vma, 12 /* Load values from _ovly_buf_table[buf]. * extern struct { * u32 mapped; * } _ovly_buf_table[]; */ ila tab, _ovly_buf_table ai off, buf, -1 shli off, off, 2 lqx map, tab, off rotqby cur, map, off /* Branch to $79 now if overlay is already mapped. */ ceq cmp, $78, cur brnz cmp, __ovly_load_restore /* Marker for profiling code. If we get here, we are about to load * a new overlay. */ .global __ovly_load_event .type __ovly_load_event, @function __ovly_load_event: /* Set _ovly_buf_table[buf].mapped = $78. */ cwx genwi, tab, off shufb map, $78, map, genwi stqx map, tab, off /* A new partition needs to be loaded. Prepare for DMA loop. * _EAR_ is the 64b base EA, filled in at run time by the * loader, and indicating the value for SPU executable image start. */ lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval) rotqbyi osize, vma, 4 rotqbyi sz, vma, 8 lqa ea64, _EAR_ __ovly_xfer_loop: /* 64b add to compute next ea64. */ rotqmbyi off64, sz, -4 cg cgbits, ea64, off64 shufb add64, cgbits, cgbits, cgshuf addx add64, ea64, off64 ori ea64, add64, 0 /* Setup DMA parameters, then issue DMA request. */ rotqbyi ealo, add64, 4 ila maxsize, MFC_MAX_DMA_SIZE cgt cmp, osize, maxsize selb sz, osize, maxsize, cmp ila tagid, MFC_TAG_ID wrch $MFC_LSA, vma wrch $MFC_EAH, ea64 wrch $MFC_EAL, ealo wrch $MFC_Size, sz wrch $MFC_TagId, tagid ila cmd, MFC_GET_CMD wrch $MFC_Cmd, cmd #ifdef OVLY_PRINTFS //============================================== // In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x //============================================== # save registers stqd $10, SQWM5($sp) stqd $11, SQWM6($sp) stqd $12, SQWM7($sp) # Place input parameters onto the stack to form the # local storage memory image. ila $10, __dma_event_format stqd $10, SQWM14($sp) ai $10, $sp, SQWM11 stqd $10, SQWM13($sp) stqd $sp, SQWM12($sp) stqd vma, SQWM11($sp) stqd ea64, SQWM10($sp) stqd ealo, SQWM9($sp) stqd sz, SQWM8($sp) # Construct a message consisting of the 8-bit opcode # and 24-bit local store pointer to the input # parameters and place it forllowing the stop and signal ila $10, 0x3ffff # address mask ilhu $11, SPE_C99_VPRINTF << 8 ai $12, $sp, SQWM14 # parameter pointer selb $11, $11, $12, $10 # combine command & address ptr brsl $10, next3a next3a: .type next3a, @function lqr $12, message3a cwd $10, message3a-next3a($10) shufb $11, $11, $12, $10 # insert msg into inst word stqr $11, message3a # store cmd/ptr into msg word dsync # Notify the PPE to perform the assisted call request # by issing a stop and signal with a signal code # of 0x2100 (C99 class) stop 0x2100 message3a: .word 0 # restore registers lqd $12, SQWM7($sp) lqd $11, SQWM6($sp) lqd $10, SQWM5($sp) //============================================== #endif /* Increment vma, decrement size, branch back as needed. */ a vma, vma, sz sf osize, sz, osize brnz osize, __ovly_xfer_loop /* Save app's tagmask, wait for DMA complete, restore mask. */ rdch oldmask, $MFC_RdTagMask #if MFC_TAG_ID < 16 ilh newmask, 1 << MFC_TAG_ID #else ilhu newmask, 1 << (MFC_TAG_ID - 16) #endif wrch $MFC_WrTagMask, newmask ila tagstat, MFC_TAG_UPDATE_ALL wrch $MFC_WrTagUpdate, tagstat rdch tagstat, $MFC_RdTagStat sync wrch $MFC_WrTagMask, oldmask #ifdef OVLY_PRINTFS //============================================== // In debug_event_hook link-register=0x%08x %08x %08x %08x //============================================== # save registers stqd $10, SQWM5($sp) stqd $11, SQWM6($sp) stqd $12, SQWM7($sp) # Place input parameters onto the stack to form the # local storage memory image. ila $10, __debug_event_format stqd $10, SQWM14($sp) ai $10, $sp, SQWM11 stqd $10, SQWM13($sp) stqd $sp, SQWM12($sp) stqd $lr, SQWM11($sp) rotqbyi $10, $lr, 4 stqd $10, SQWM10($sp) rotqbyi $10, $10, 4 stqd $10, SQWM9($sp) rotqbyi $10, $10, 4 stqd $10, SQWM8($sp) # Construct a message consisting of the 8-bit opcode # and 24-bit local store pointer to the input # parameters and place it forllowing the stop and signal ila $10, 0x3ffff # address mask ilhu $11, SPE_C99_VPRINTF << 8 ai $12, $sp, SQWM14 # parameter pointer selb $11, $11, $12, $10 # combine command & address ptr brsl $10, next2a next2a: .type next2a, @function lqr $12, message2a cwd $10, message2a-next2a($10) shufb $11, $11, $12, $10 # insert msg into inst word stqr $11, message2a # store cmd/ptr into msg word dsync # Notify the PPE to perform the assisted call request # by issing a stop and signal with a signal code # of 0x2100 (C99 class) stop 0x2100 message2a: .word 0 # save registers stqd $13, SQWM8($sp) stqd $14, SQWM9($sp) stqd $15, SQWM10($sp) stqd $16, SQWM11($sp) # initialize loop il $13, 1 ila $14, _ovly_buf_table ila $15, _ovly_buf_table_end loop_start2: # Place input parameters onto the stack to form the # local storage memory image. ila $10, __ovly_buf_table_format stqd $10, SQWM16($sp) ai $10, $sp, SQWM13 stqd $10, SQWM15($sp) stqd $sp, SQWM14($sp) stqd $13, SQWM13($sp) lqd $16, 0($14) rotqby $16, $16, $14 stqd $16, SQWM12($sp) # Construct a message consisting of the 8-bit opcode # and 24-bit local store pointer to the input # parameters and place it forllowing the stop and signal ila $10, 0x3ffff # address mask ilhu $11, SPE_C99_VPRINTF << 8 ai $12, $sp, SQWM16 # parameter pointer selb $11, $11, $12, $10 # combine command & address ptr brsl $10, next2b next2b: .type next2b, @function lqr $12, message2b cwd $10, message2b-next2b($10) shufb $11, $11, $12, $10 # insert msg into inst word stqr $11, message2b # store cmd/ptr into msg word dsync # Notify the PPE to perform the assisted call request # by issing a stop and signal with a signal code # of 0x2100 (C99 class) stop 0x2100 message2b: .word 0 # move to next entry ai $13, $13, 1 ai $14, $14, 4 clgt $16, $15, $14 brnz $16, loop_start2 # restore registers lqd $16, SQWM11($sp) lqd $15, SQWM10($sp) lqd $14, SQWM9($sp) lqd $13, SQWM8($sp) lqd $12, SQWM7($sp) lqd $11, SQWM6($sp) lqd $10, SQWM5($sp) //============================================== #endif .global _ovly_debug_event .type _ovly_debug_event, @function _ovly_debug_event: /* GDB inserts debugger trap here. */ nop __ovly_load_restore: #ifdef OVLY_IRQ_SAVE /* Conditionally re-enable interrupts. */ andi irq_stat, irq_stat, 1 ila irqtmp, __ovly_irq_restore binze irq_stat, irqtmp __ovly_irq_restore: lqd $9, -64($sp) #endif /* Restore saved registers. */ lqd $8, -48($sp) lqd $7, -32($sp) lqd $6, -16($sp) __ovly_load_ret: /* Branch to target address. */ bi $79 .size __ovly_load, . - __ovly_load