aboutsummaryrefslogtreecommitdiff
path: root/ld/emultempl/spu_ovl.S
blob: 0f1064bdaba6d2de0a72d01a2683c9ce8f297647 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
/* Overlay manager for SPU.

   Copyright 2006, 2007 Free Software Foundation, Inc.

   This file is part of GLD, the Gnu Linker.

   GLD is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GLD is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GLD; see the file COPYING.  If not, write to the Free
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
   02110-1301, USA.  */

/**
 * MFC DMA defn's.
 */
#define MFC_GET_CMD		0x40
#define MFC_MAX_DMA_SIZE	0x4000
#define MFC_TAG_UPDATE_ALL	2
#define MFC_TAG_ID		0


/**
 * Temporary register allocations.
 * These are saved/restored here.
 */
#define tab		$75
#define cgbits		$75
#define add64		$75
#define ealo		$75
#define newmask		$75
#define tagstat		$75
#define bchn		$75
#define rv1		$75

#define off		$76
#define off64		$76
#define maxsize		$76
#define oldmask		$76
#define sz		$76
#define lnkr		$76
#define rv2		$76

#define cur		$77
#define cmp		$77
#define buf		$77
#define genwi		$77
#define tagid		$77
#define cmd		$77
#define rv3		$77

#define cgshuf		$78

#define vma		$6

#define map		$7
#define osize		$7
#define cmp2		$7

#define ea64		$8
#define retval		$8

#ifdef OVLY_IRQ_SAVE
#define irqtmp		$8
#define irq_stat	$9
#endif

	.extern		_ovly_table
	.extern		_ovly_buf_table

	.text
	.align 		4
	.type		__rv_pattern, @object
	.size		__rv_pattern, 16
__rv_pattern:
	.word		0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
	.type		__cg_pattern, @object
	.size		__cg_pattern, 16
__cg_pattern:
	.word		0x04050607, 0x80808080, 0x80808080, 0x80808080

/**
 * __ovly_return - stub for returning from overlay functions.
 *
 * inputs:
 *	$lr	link register
 *
 * outputs:
 *	$78	old partition number, to be reloaded
 *	$79	return address in old partion number
 */
	.global		__ovly_return
	.type		__ovly_return, @function

	.word		0
__ovly_return:
	shlqbyi		$78, $lr, 4
	shlqbyi		$79, $lr, 8
	biz		$78, $79
	.size		__ovly_return, . - __ovly_return

/**
 * __ovly_load - copy an overlay partion to local store.
 *
 * inputs:
 *	$78	partition number to be loaded.
 *	$79	branch target in new partition.
 *	$lr	link register, containing return addr.
 *
 * outputs:
 *	$lr	new link register, returning through __ovly_return.
 *
 * Copy a new overlay partition into local store, or return 
 * immediately if the partition is already resident.
 */
	.global		__ovly_load
	.type		__ovly_load, @function

__ovly_load:
/* Save temporary registers to stack. */
	stqd		$6, -16($sp)
	stqd		$7, -32($sp)
	stqd		$8, -48($sp)

#ifdef OVLY_IRQ_SAVE
/* Save irq state, then disable interrupts. */ 
	stqd		$9, -64($sp)
	ila		irqtmp, __ovly_irq_save
	rdch		irq_stat, $SPU_RdMachStat
	bid		irqtmp
__ovly_irq_save:
#endif

/* Set branch hint to overlay target. */
	hbr		__ovly_load_ret, $79

/* Get caller's overlay index by back chaining through stack frames.
 * Loop until end of stack (back chain all-zeros) or
 * encountered a link register we set here. */
	lqd		bchn, 0($sp)
	ila		retval, __ovly_return

__ovly_backchain_loop:
	lqd		lnkr, 16(bchn)
	lqd		bchn, 0(bchn)
	ceq		cmp, lnkr, retval
	ceqi		cmp2, bchn, 0
	or		cmp, cmp, cmp2	
	brz		cmp, __ovly_backchain_loop

/* If we reached the zero back-chain, then lnkr is bogus.  Clear the
 * part of lnkr that we use later (slot 3). */
	rotqbyi		cmp2, cmp2, 4
	andc		lnkr, lnkr, cmp2

/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
	lqd		rv1, (__rv_pattern-__ovly_return+4)(retval)
	shufb		rv2, retval, lnkr, rv1
	shufb		rv3, $lr, $78, rv1
	fsmbi		rv1, 0xff
	selb		$lr, rv2, rv3, rv1

/* Branch to $79 if non-overlay */
	brz		$78, __ovly_load_restore

/* Load values from _ovly_table[$78].
 *	extern struct {
 *		u32 vma;
 *		u32 size;
 *		u32 file_offset;
 *		u32 buf;
 *	} _ovly_table[];
 */
	shli		off, $78, 4
	ila		tab, _ovly_table - 16
	lqx		vma, tab, off
	rotqbyi		buf, vma, 12

/* Load values from _ovly_buf_table[buf].
 *	extern struct {
 *		u32 mapped;
 *	} _ovly_buf_table[];
 */
	ila		tab, _ovly_buf_table
	ai		off, buf, -1
	shli		off, off, 2
	lqx		map, tab, off
	rotqby		cur, map, off

/* Branch to $79 now if overlay is already mapped.  */
	ceq		cmp, $78, cur
	brnz		cmp, __ovly_load_restore

/* Marker for profiling code.  If we get here, we are about to load
 * a new overlay.
 */
	.global		__ovly_load_event
	.type		__ovly_load_event, @function
__ovly_load_event:

/* Set _ovly_buf_table[buf].mapped = $78. */
	cwx		genwi, tab, off
	shufb		map, $78, map, genwi
	stqx		map, tab, off

/* A new partition needs to be loaded. Prepare for DMA loop. 
 * _EAR_ is the 64b base EA, filled in at run time by the
 * loader, and indicating the value for SPU executable image start.
 */
	lqd		cgshuf, (__cg_pattern-__ovly_return+4)(retval)
	rotqbyi		osize, vma, 4
	rotqbyi		sz, vma, 8
	lqa		ea64, _EAR_

__ovly_xfer_loop:
/* 64b add to compute next ea64. */
	rotqmbyi	off64, sz, -4
	cg		cgbits, ea64, off64
	shufb		add64, cgbits, cgbits, cgshuf
	addx		add64, ea64, off64
	ori		ea64, add64, 0

/* Setup DMA parameters, then issue DMA request. */
	rotqbyi		ealo, add64, 4
	ila		maxsize, MFC_MAX_DMA_SIZE
	cgt		cmp, osize, maxsize
	selb		sz, osize, maxsize, cmp
	ila		tagid, MFC_TAG_ID
	wrch		$MFC_LSA, vma
	wrch		$MFC_EAH, ea64
	wrch		$MFC_EAL, ealo
	wrch		$MFC_Size, sz
	wrch		$MFC_TagId, tagid
	ila		cmd, MFC_GET_CMD
	wrch		$MFC_Cmd, cmd

/* Increment vma, decrement size, branch back as needed. */
	a		vma, vma, sz
	sf		osize, sz, osize
	brnz		osize, __ovly_xfer_loop

/* Save app's tagmask, wait for DMA complete, restore mask. */
	rdch		oldmask, $MFC_RdTagMask
#if MFC_TAG_ID < 16
	ilh		newmask, 1 << MFC_TAG_ID
#else
	ilhu		newmask, 1 << (MFC_TAG_ID - 16)
#endif
	wrch		$MFC_WrTagMask, newmask
	ila		tagstat, MFC_TAG_UPDATE_ALL
	wrch		$MFC_WrTagUpdate, tagstat
	rdch		tagstat, $MFC_RdTagStat
	sync
	wrch		$MFC_WrTagMask, oldmask

	.global		_ovly_debug_event
	.type		_ovly_debug_event, @function
_ovly_debug_event:
/* GDB inserts debugger trap here.  */
	nop

__ovly_load_restore:
#ifdef OVLY_IRQ_SAVE
/* Conditionally re-enable interrupts. */
	andi		irq_stat, irq_stat, 1
	ila		irqtmp, __ovly_irq_restore
	binze		irq_stat, irqtmp
__ovly_irq_restore:
	lqd		$9, -64($sp)
#endif

/* Restore saved registers. */
	lqd		$8, -48($sp)
	lqd		$7, -32($sp)
	lqd		$6, -16($sp)

__ovly_load_ret:
/* Branch to target address. */
	bi		$79

	.size		__ovly_load, . - __ovly_load