1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
|
/*
* vr5xxx.S -- CPU specific support routines
*
* Copyright (c) 1999 Cygnus Solutions
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/* This file cloned from vr4300.S by dlindsay@cygnus.com
* and recoded to suit Vr5432 and Vr5000.
* Should be no worse for Vr43{00,05,10}.
* Specifically, __cpu_flush() has been changed (a) to allow for the hardware
* difference (in set associativity) between the Vr5432 and Vr5000,
* and (b) to flush the optional secondary cache of the Vr5000.
*/
/* Processor Revision Identifier (PRID) Register: Implementation Numbers */
#define IMPL_VR5432 0x54
/* Cache Constants not determinable dynamically */
#define VR5000_2NDLINE 32 /* secondary cache line size */
#define VR5432_LINE 32 /* I,Dcache line sizes */
#define VR5432_SIZE (16*1024) /* I,Dcache half-size */
#ifndef __mips64
.set mips3
#endif
#ifdef __mips16
/* This file contains 32 bit assembly code. */
.set nomips16
#endif
#include "regs.S"
.text
.align 2
# Taken from "R4300 Preliminary RISC Processor Specification
# Revision 2.0 January 1995" page 39: "The Count
# register... increments at a constant rate... at one-half the
# PClock speed."
# We can use this fact to provide small polled delays.
.globl __cpu_timer_poll
.ent __cpu_timer_poll
__cpu_timer_poll:
.set noreorder
# in: a0 = (unsigned int) number of PClock ticks to wait for
# out: void
# The Vr4300 counter updates at half PClock, so divide by 2 to
# get counter delta:
bnezl a0, 1f # continue if delta non-zero
srl a0, a0, 1 # divide ticks by 2 {DELAY SLOT}
# perform a quick return to the caller:
j ra
nop # {DELAY SLOT}
1:
mfc0 v0, $9 # C0_COUNT: get current counter value
nop
nop
# We cannot just do the simple test, of adding our delta onto
# the current value (ignoring overflow) and then checking for
# equality. The counter is incrementing every two PClocks,
# which means the counter value can change between
# instructions, making it hard to sample at the exact value
# desired.
# However, we do know that our entry delta value is less than
# half the number space (since we divide by 2 on entry). This
# means we can use a difference in signs to indicate timer
# overflow.
addu a0, v0, a0 # unsigned add (ignore overflow)
# We know have our end value (which will have been
# sign-extended to fill the 64bit register value).
2:
# get current counter value:
mfc0 v0, $9 # C0_COUNT
nop
nop
# This is an unsigned 32bit subtraction:
subu v0, a0, v0 # delta = (end - now) {DELAY SLOT}
bgtzl v0, 2b # looping back is most likely
nop
# We have now been delayed (in the foreground) for AT LEAST
# the required number of counter ticks.
j ra # return to caller
nop # {DELAY SLOT}
.set reorder
.end __cpu_timer_poll
# Flush the processor caches to memory:
.globl __cpu_flush
.ent __cpu_flush
__cpu_flush:
.set noreorder
# NOTE: The Vr4300 and Vr5432 *CANNOT* have any secondary cache.
# On those, SC (bit 17 of CONFIG register) is hard-wired to 1,
# except that email from Dennis_Han@el.nec.com says that old
# versions of the Vr5432 incorrectly hard-wired this bit to 0.
# The Vr5000 has an optional direct-mapped secondary cache,
# and the SC bit correctly indicates this.
# So, for the 4300 and 5432 we want to just
# flush the primary Data and Instruction caches.
# For the 5000 it is desired to flush the secondary cache too.
# There is an operation difference worth noting.
# The 4300 and 5000 primary caches use VA bit 14 to choose cache set,
# whereas 5432 primary caches use VA bit 0.
# This code interprets the relevant Config register bits as
# much as possible, except for the 5432.
# The code therefore has some portability.
# However, the associativity issues mean you should not just assume
# that this code works anywhere. Also, the secondary cache set
# size is hardwired, since the 5000 series does not define codes
# for variant sizes.
# Note: this version of the code flushes D$ before I$.
# It is difficult to construct a case where that matters,
# but it cant hurt.
mfc0 a0, C0_PRID # a0 = Processor Revision register
nop # dlindsay: unclear why the nops, but
nop # vr4300.S had such so I do too.
srl a2, a0, PR_IMP # want bits 8..15
andi a2, a2, 0x255 # mask: now a2 = Implementation # field
li a1, IMPL_VR5432
beq a1, a2, 8f # use Vr5432-specific flush algorithm
nop
# Non-Vr5432 version of the code.
# (The distinctions being: CONFIG is truthful about secondary cache,
# and we act as if the primary Icache and Dcache are direct mapped.)
mfc0 t0, C0_CONFIG # t0 = CONFIG register
nop
nop
li a1, 1 # a1=1, a useful constant
srl a2, t0, CR_IC # want IC field of CONFIG
andi a2, a2, 0x7 # mask: now a2= code for Icache size
add a2, a2, 12 # +12
sllv a2, a1, a2 # a2=primary instruction cache size in bytes
srl a3, t0, CR_DC # DC field of CONFIG
andi a3, a3, 0x7 # mask: now a3= code for Dcache size
add a3, a3, 12 # +12
sllv a3, a1, a3 # a3=primary data cache size in bytes
li t2, (1 << CR_IB) # t2=mask over IB boolean
and t2, t2, t0 # test IB field of CONFIG register value
beqz t2, 1f #
li a1, 16 # 16 bytes (branch shadow: always loaded.)
li a1, 32 # non-zero, then 32bytes
1:
li t2, (1 << CR_DB) # t2=mask over DB boolean
and t2, t2, t0 # test BD field of CONFIG register value
beqz t2, 2f #
li a0, 16 # 16bytes (branch shadow: always loaded.)
li a0, 32 # non-zero, then 32bytes
2:
lui t1, ((K0BASE >> 16) & 0xFFFF)
ori t1, t1, (K0BASE & 0xFFFF)
# At this point,
# a0 = primary Dcache line size in bytes
# a1 = primary Icache line size in bytes
# a2 = primary Icache size in bytes
# a3 = primary Dcache size in bytes
# t0 = CONFIG value
# t1 = a round unmapped cached base address (we are in kernel mode)
# t2,t3 scratch
addi t3, t1, 0 # t3=t1=start address for any cache
add t2, t3, a3 # t2=end adress+1 of Dcache
sub t2, t2, a0 # t2=address of last line in Dcache
3:
cache INDEX_WRITEBACK_INVALIDATE_D,0(t3)
bne t3, t2, 3b #
addu t3, a0 # (delay slot) increment by Dcache line size
# Now check CONFIG to see if there is a secondary cache
lui t2, (1 << (CR_SC-16)) # t2=mask over SC boolean
and t2, t2, t0 # test SC in CONFIG
bnez t2, 6f
# There is a secondary cache. Find out its sizes.
srl t3, t0, CR_SS # want SS field of CONFIG
andi t3, t3, 0x3 # mask: now t3= code for cache size.
beqz t3, 4f
lui a3, ((512*1024)>>16) # a3= 512K, code was 0
addu t3, -1 # decrement code
beqz t3, 4f
lui a3, ((1024*1024)>>16) # a3= 1 M, code 1
addu t3, -1 # decrement code
beqz t3, 4f
lui a3, ((2*1024*1024)>>16) # a3= 2 M, code 2
j 6f # no secondary cache, code 3
4: # a3 = secondary cache size in bytes
li a0, VR5000_2NDLINE # no codes assigned for other than 32
# At this point,
# a0 = secondary cache line size in bytes
# a1 = primary Icache line size in bytes
# a2 = primary Icache size in bytes
# a3 = secondary cache size in bytes
# t1 = a round unmapped cached base address (we are in kernel mode)
# t2,t3 scratch
addi t3, t1, 0 # t3=t1=start address for any cache
add t2, t3, a3 # t2=end address+1 of secondary cache
sub t2, t2, a0 # t2=address of last line in secondary cache
5:
cache INDEX_WRITEBACK_INVALIDATE_SD,0(t3)
bne t3, t2, 5b
addu t3, a0 # (delay slot) increment by line size
6: # Any optional secondary cache done. Now do I-cache and return.
# At this point,
# a1 = primary Icache line size in bytes
# a2 = primary Icache size in bytes
# t1 = a round unmapped cached base address (we are in kernel mode)
# t2,t3 scratch
add t2, t1, a2 # t2=end adress+1 of Icache
sub t2, t2, a1 # t2=address of last line in Icache
7:
cache INDEX_INVALIDATE_I,0(t1)
bne t1, t2, 7b
addu t1, a1 # (delay slot) increment by Icache line size
j ra # return to the caller
nop
8:
# Vr5432 version of the cpu_flush code.
# (The distinctions being: CONFIG can not be trusted about secondary
# cache (which does not exist). The primary caches use Virtual Address Bit 0
# to control set selection.
# Code does not consult CONFIG about cache sizes: knows the hardwired sizes.
# Since both I and D have the same size and line size, uses a merged loop.
li a0, VR5432_LINE
li a1, VR5432_SIZE
lui t1, ((K0BASE >> 16) & 0xFFFF)
ori t1, t1, (K0BASE & 0xFFFF)
# a0 = cache line size in bytes
# a1 = 1/2 cache size in bytes
# t1 = a round unmapped cached base address (we are in kernel mode)
add t2, t1, a1 # t2=end address+1
sub t2, t2, a0 # t2=address of last line in Icache
9:
cache INDEX_WRITEBACK_INVALIDATE_D,0(t1) # set 0
cache INDEX_WRITEBACK_INVALIDATE_D,1(t1) # set 1
cache INDEX_INVALIDATE_I,0(t1) # set 0
cache INDEX_INVALIDATE_I,1(t1) # set 1
bne t1, t2, 9b
addu t1, a0
j ra # return to the caller
nop
.set reorder
.end __cpu_flush
# NOTE: This variable should *NOT* be addressed relative to
# the $gp register since this code is executed before $gp is
# initialised... hence we leave it in the text area. This will
# cause problems if this routine is ever ROMmed:
.globl __buserr_cnt
__buserr_cnt:
.word 0
.align 3
__k1_save:
.word 0
.word 0
.align 2
.ent __buserr
.globl __buserr
__buserr:
.set noat
.set noreorder
# k0 and k1 available for use:
mfc0 k0,C0_CAUSE
nop
nop
andi k0,k0,0x7c
sub k0,k0,7 << 2
beq k0,$0,__buserr_do
nop
# call the previous handler
la k0,__previous
jr k0
nop
#
__buserr_do:
# TODO: check that the cause is indeed a bus error
# - if not then just jump to the previous handler
la k0,__k1_save
sd k1,0(k0)
#
la k1,__buserr_cnt
lw k0,0(k1) # increment counter
addu k0,1
sw k0,0(k1)
#
la k0,__k1_save
ld k1,0(k0)
#
mfc0 k0,C0_EPC
nop
nop
addu k0,k0,4 # skip offending instruction
mtc0 k0,C0_EPC # update EPC
nop
nop
eret
# j k0
# rfe
.set reorder
.set at
.end __buserr
__exception_code:
.set noreorder
lui k0,%hi(__buserr)
daddiu k0,k0,%lo(__buserr)
jr k0
nop
.set reorder
__exception_code_end:
.data
__previous:
.space (__exception_code_end - __exception_code)
# This subtracting two addresses is working
# but is not garenteed to continue working.
# The assemble reserves the right to put these
# two labels into different frags, and then
# cant take their difference.
.text
.ent __default_buserr_handler
.globl __default_buserr_handler
__default_buserr_handler:
.set noreorder
# attach our simple bus error handler:
# in: void
# out: void
mfc0 a0,C0_SR
nop
li a1,SR_BEV
and a1,a1,a0
beq a1,$0,baseaddr
lui a0,0x8000 # delay slot
lui a0,0xbfc0
daddiu a0,a0,0x0200
baseaddr:
daddiu a0,a0,0x0180
# a0 = base vector table address
la a1,__exception_code_end
la a2,__exception_code
subu a1,a1,a2
la a3,__previous
# there must be a better way of doing this????
copyloop:
lw v0,0(a0)
sw v0,0(a3)
lw v0,0(a2)
sw v0,0(a0)
daddiu a0,a0,4
daddiu a2,a2,4
daddiu a3,a3,4
subu a1,a1,4
bne a1,$0,copyloop
nop
la a0,__buserr_cnt
sw $0,0(a0)
j ra
nop
.set reorder
.end __default_buserr_handler
.ent __restore_buserr_handler
.globl __restore_buserr_handler
__restore_buserr_handler:
.set noreorder
# restore original (monitor) bus error handler
# in: void
# out: void
mfc0 a0,C0_SR
nop
li a1,SR_BEV
and a1,a1,a0
beq a1,$0,res_baseaddr
lui a0,0x8000 # delay slot
lui a0,0xbfc0
daddiu a0,a0,0x0200
res_baseaddr:
daddiu a0,a0,0x0180
# a0 = base vector table address
la a1,__exception_code_end
la a3,__exception_code
subu a1,a1,a3
la a3,__previous
# there must be a better way of doing this????
res_copyloop:
lw v0,0(a3)
sw v0,0(a0)
daddiu a0,a0,4
daddiu a3,a3,4
subu a1,a1,4
bne a1,$0,res_copyloop
nop
j ra
nop
.set reorder
.end __restore_buserr_handler
.ent __buserr_count
.globl __buserr_count
__buserr_count:
.set noreorder
# restore original (monitor) bus error handler
# in: void
# out: unsigned int __buserr_cnt
la v0,__buserr_cnt
lw v0,0(v0)
j ra
nop
.set reorder
.end __buserr_count
/* EOF vr5xxx.S */
|