libgloss/mips/vr4300.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

/*
 * vr4300.S -- CPU specific support routines
 *
 * Copyright (c) 1995,1996 Cygnus Support
 *
 * The authors hereby grant permission to use, copy, modify, distribute,
 * and license this software and its documentation for any purpose, provided
 * that existing copyright notices are retained in all copies and that this
 * notice is included verbatim in any distributions. No written agreement,
 * license, or royalty fee is required for any of the authorized uses.
 * Modifications to this software may be copyrighted by their authors
 * and need not follow the licensing terms described here, provided that
 * the new terms are clearly indicated on the first page of each file where
 * they apply.
 */

#ifndef __mips64
	.set mips3
#endif
#ifdef __mips16
/* This file contains 32 bit assembly code.  */
	.set nomips16
#endif

#include "regs.S"

	.text
	.align	2

	# Taken from "R4300 Preliminary RISC Processor Specification
	# Revision 2.0 January 1995" page 39: "The Count
	# register... increments at a constant rate... at one-half the
	# PClock speed."
	# We can use this fact to provide small polled delays.
	.globl	__cpu_timer_poll
	.ent	__cpu_timer_poll
__cpu_timer_poll:
	.set	noreorder
	# in:	a0 = (unsigned int) number of PClock ticks to wait for
	# out:	void

	# The Vr4300 counter updates at half PClock, so divide by 2 to
	# get counter delta:
	bnezl	a0, 1f		# continue if delta non-zero
	srl	a0, a0, 1	# divide ticks by 2		{DELAY SLOT}
	# perform a quick return to the caller:
	j	ra
	nop			#				{DELAY SLOT}
1:
	mfc0	v0, C0_COUNT	# get current counter value
	nop
	nop
	# We cannot just do the simple test, of adding our delta onto
	# the current value (ignoring overflow) and then checking for
	# equality. The counter is incrementing every two PClocks,
	# which means the counter value can change between
	# instructions, making it hard to sample at the exact value
	# desired.

	# However, we do know that our entry delta value is less than
	# half the number space (since we divide by 2 on entry). This
	# means we can use a difference in signs to indicate timer
	# overflow.
	addu	a0, v0, a0	# unsigned add (ignore overflow)
	# We know have our end value (which will have been
	# sign-extended to fill the 64bit register value).
2:
	# get current counter value:
	mfc0	v0, C0_COUNT
	nop
	nop
	# This is an unsigned 32bit subtraction:
	subu	v0, a0, v0	# delta = (end - now)		{DELAY SLOT}
	bgtzl	v0, 2b		# looping back is most likely
	nop
	# We have now been delayed (in the foreground) for AT LEAST
	# the required number of counter ticks.
	j	ra		# return to caller
	nop			#				{DELAY SLOT}
	.set	reorder
	.end	__cpu_timer_poll

	# Flush the processor caches to memory:

	.globl	__cpu_flush
	.ent	__cpu_flush
__cpu_flush:
	.set	noreorder
	# NOTE: The Vr4300 *CANNOT* have any secondary cache (bit 17
	# of the CONFIG registered is hard-wired to 1). We just
	# provide code to flush the Data and Instruction caches.

	# Even though the Vr4300 has hard-wired cache and cache line
	# sizes, we still interpret the relevant Config register
	# bits. This allows this code to be used for other conforming
	# MIPS architectures if desired.

	# Get the config register
	mfc0	a0, C0_CONFIG
	nop
	nop
	li	a1, 1		# a useful constant
	#
	srl	a2, a0, 9	# bits 11..9 for instruction cache size
	andi	a2, a2, 0x7	# 3bits of information
	add	a2, a2, 12	# get full power-of-2 value
	sllv	a2, a1, a2	# instruction cache size
	#
	srl	a3, a0, 6	# bits 8..6 for data cache size
	andi	a3, a3, 0x7	# 3bits of information
	add	a3, a3, 12	# get full power-of-2 value
	sllv	a3, a1, a3	# data cache size
	#
	li	a1, (1 << 5)	# check IB (instruction cache line size)
	and	a1, a0, a1	# mask against the CONFIG register value
	beqz	a1, 1f		# branch on result of delay slot operation
	nop
	li	a1, 32		# non-zero, then 32bytes
	j	2f		# continue
	nop
1:
	li	a1, 16		# 16bytes
2:
	#
	li	t0, (1 << 4)	# check DB (data cache line size)
	and	a0, a0, t0	# mask against the CONFIG register value
	beqz	a0, 3f		# branch on result of delay slot operation
	nop
	li	a0, 32		# non-zero, then 32bytes
	j	4f		# continue
	nop
3:
	li	a0, 16		# 16bytes
4:
	#
	# a0 = data cache line size
	# a1 = instruction cache line size
	# a2 = instruction cache size
	# a3 = data cache size
	#
	lui	t0, ((K0BASE >> 16) & 0xFFFF)
	ori	t0, t0, (K0BASE & 0xFFFF)
	addu	t1, t0, a2	# end cache address
	subu	t2, a1, 1	# line size mask
	not	t2		# invert the mask
	and	t3, t0, t2	# get start address
	addu	t1, -1
	and	t1, t2		# get end address
5:
	cache	INDEX_INVALIDATE_I,0(t3)
	bne	t3, t1, 5b
	addu	t3, a1
	#
	addu	t1, t0, a3	# end cache address
	subu	t2, a0, 1	# line size mask
	not	t2		# invert the mask
	and	t3, t0, t2	# get start address
	addu	t1, -1
	and	t1, t2		# get end address
6:
	cache	INDEX_WRITEBACK_INVALIDATE_D,0(t3)
	bne	t3, t1, 6b
	addu	t3, a0
	#
	j	ra	# return to the caller
	nop
	.set	reorder
	.end	__cpu_flush

	# NOTE: This variable should *NOT* be addressed relative to
	# the $gp register since this code is executed before $gp is
	# initialised... hence we leave it in the text area. This will
	# cause problems if this routine is ever ROMmed:

	.globl	__buserr_cnt
__buserr_cnt:
	.word	0
	.align	3
__k1_save:
	.word	0
	.word	0
	.align	2

        .ent __buserr
        .globl __buserr
__buserr:
        .set noat
	.set noreorder
	# k0 and k1 available for use:
	mfc0	k0,C0_CAUSE
	nop
	nop
	andi	k0,k0,0x7c
	sub	k0,k0,7 << 2
	beq	k0,$0,__buserr_do
	nop
	# call the previous handler
	la	k0,__previous
	jr	k0
	nop
	#
__buserr_do:
	# TODO: check that the cause is indeed a bus error
	# - if not then just jump to the previous handler
	la	k0,__k1_save
	sd	k1,0(k0)
	#
        la      k1,__buserr_cnt
        lw      k0,0(k1)        # increment counter
        addu    k0,1
        sw      k0,0(k1)
	#
	la	k0,__k1_save
	ld	k1,0(k0)
	#
        mfc0    k0,C0_EPC
	nop
	nop
        addu    k0,k0,4		# skip offending instruction
	mtc0	k0,C0_EPC	# update EPC
	nop
	nop
	eret
#        j       k0
#        rfe
        .set reorder
        .set at
        .end __buserr

__exception_code:
	.set noreorder
	lui	k0,%hi(__buserr)
	daddiu	k0,k0,%lo(__buserr)
	jr	k0
	nop
	.set reorder
__exception_code_end:

	.data
__previous:
	.space	(__exception_code_end - __exception_code)
	# This subtracting two addresses is working
	# but is not garenteed to continue working.
	# The assemble reserves the right to put these
	# two labels into different frags, and then
	# cant take their difference.

	.text

	.ent	__default_buserr_handler
	.globl	__default_buserr_handler
__default_buserr_handler:
        .set noreorder
	# attach our simple bus error handler:
	# in:  void
	# out: void
	mfc0	a0,C0_SR
	nop
	li	a1,SR_BEV
	and	a1,a1,a0
	beq	a1,$0,baseaddr
	lui	a0,0x8000	# delay slot
	lui	a0,0xbfc0
	daddiu	a0,a0,0x0200
baseaddr:
	daddiu	a0,a0,0x0180
	# a0 = base vector table address
	la	a1,__exception_code_end
	la	a2,__exception_code
	subu	a1,a1,a2
	la	a3,__previous
	# there must be a better way of doing this????
copyloop:
	lw	v0,0(a0)
	sw	v0,0(a3)
	lw	v0,0(a2)
	sw	v0,0(a0)
	daddiu	a0,a0,4
	daddiu	a2,a2,4
	daddiu	a3,a3,4
	subu	a1,a1,4
	bne	a1,$0,copyloop
	nop
        la      a0,__buserr_cnt
	sw	$0,0(a0)
	j	ra
	nop
        .set reorder
	.end	__default_buserr_handler

	.ent	__restore_buserr_handler
	.globl	__restore_buserr_handler
__restore_buserr_handler:
        .set noreorder
	# restore original (monitor) bus error handler
	# in:  void
	# out: void
	mfc0	a0,C0_SR
	nop
	li	a1,SR_BEV
	and	a1,a1,a0
	beq	a1,$0,res_baseaddr
	lui	a0,0x8000	# delay slot
	lui	a0,0xbfc0
	daddiu	a0,a0,0x0200
res_baseaddr:
	daddiu	a0,a0,0x0180
	# a0 = base vector table address
	la	a1,__exception_code_end
	la	a3,__exception_code
	subu	a1,a1,a3
	la	a3,__previous
	# there must be a better way of doing this????
res_copyloop:
	lw	v0,0(a3)
	sw	v0,0(a0)
	daddiu	a0,a0,4
	daddiu	a3,a3,4
	subu	a1,a1,4
	bne	a1,$0,res_copyloop
	nop
	j	ra
	nop
        .set reorder
	.end	__restore_buserr_handler

	.ent	__buserr_count
	.globl	__buserr_count
__buserr_count:
        .set noreorder
	# restore original (monitor) bus error handler
	# in:  void
	# out: unsigned int __buserr_cnt
        la      v0,__buserr_cnt
	lw	v0,0(v0)
	j	ra
	nop
        .set reorder
	.end	__buserr_count

/* EOF vr4300.S */