aboutsummaryrefslogtreecommitdiff
path: root/asm/head.S
blob: 803fbf1a61ee58f291c4df8f9f69e1c3ad73c95f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
/* Copyright 2013-2014 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * 	http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <asm-utils.h>
#include <asm-offsets.h>
#include <mem-map.h>
#include <processor.h>
#include <opal-api.h>
#include <stack.h>

#define EPAPR_MAGIC	0x65504150

/* Power management instructions */
#define PPC_INST_NAP		.long 0x4c000364
#define PPC_INST_SLEEP		.long 0x4c0003a4
#define PPC_INST_RVWINKLE	.long 0x4c0003e4

#define PPC_INST_STOP		.long 0x4c0002e4

#define GET_STACK(stack_reg,pir_reg)					\
	sldi	stack_reg,pir_reg,STACK_SHIFT;				\
	addis	stack_reg,stack_reg,CPU_STACKS_OFFSET@ha;		\
	addi	stack_reg,stack_reg,CPU_STACKS_OFFSET@l;

#define GET_EMERGENCY_STACK(stack_reg,pir_reg)				\
	sldi	stack_reg,pir_reg,STACK_SHIFT;				\
	addis	stack_reg,stack_reg,EMERGENCY_CPU_STACKS_OFFSET@ha;	\
	addi	stack_reg,stack_reg,EMERGENCY_CPU_STACKS_OFFSET@l;

#define GET_CPU()							\
	clrrdi	%r13,%r1,STACK_SHIFT

#define SAVE_GPR(reg,sp)	std %r##reg,STACK_GPR##reg(sp)
#define REST_GPR(reg,sp)	ld %r##reg,STACK_GPR##reg(sp)

	.section ".head","ax"

	. = 0
.global __head
__head:
	/*
	 * When booting a P7 machine in OPAL mode this pointer is used to
	 * find the opal variant of the NACA. Unused on other machines.
	 */
	.llong	opal_naca

	/* This entry point is used when booting with a flat device-tree
	 * pointer in r3
	 */
	. = 0x10
.global fdt_entry
fdt_entry:
	mr	%r27,%r3
	b	boot_entry

	/* This is a pointer to a descriptor used by debugging tools
	 * on the service processor to get to various trace buffers
	 */
	. = 0x80
	.llong	debug_descriptor

	/* This is our boot semaphore used for CPUs to sync, it has to be
	 * at an easy to locate address (without relocation) since we
	 * need to get at it very early, before we apply our relocs
	 */
	. = 0xf0
boot_sem:
	.long	0

	/* And this is a boot flag used to kick secondaries into the
	 * main code.
	 */
boot_flag:
	.long	0

	/* This is used to trigger an assert() and in turn an ATTN
	 * in skiboot when a special sequence is written at this
	 * address. For testing purposes only.
	 */
	. = 0xf8
.global attn_trigger
attn_trigger:
	.long	0

	/* This is the host initiated reset trigger for test */
	. = 0xfc
.global hir_trigger
hir_trigger:
	.long	0

	/*
	 * At 0x100 and 0x180 reside our entry points. Once started,
	 * we will ovewrite them with our actual 0x100 exception handler
	 * used for recovering from rvw or nap mode
	 */
	. = 0x100
	/* BML entry, load up r3 with device tree location */
	li	%r3, 0
	oris	%r3, %r3, 0xa
	b	fdt_entry /* hack for lab boot */

	/* Entry point set by the FSP */
	.= 0x180
hdat_entry:
	li	%r27,0
	b	boot_entry

#define EXCEPTION(nr)		\
	.= nr			;\
	mtsprg0	%r3		;\
	mfspr	%r3,SPR_CFAR	;\
	mtsprg1 %r4		;\
	li	%r4,nr		;\
	b	_exception

	/* More exception stubs */
	EXCEPTION(0x200)
	EXCEPTION(0x300)
	EXCEPTION(0x380)
	EXCEPTION(0x400)
	EXCEPTION(0x480)
	EXCEPTION(0x500)
	EXCEPTION(0x600)
	EXCEPTION(0x700)
	EXCEPTION(0x800)
	EXCEPTION(0x900)
	EXCEPTION(0x980)
	EXCEPTION(0xa00)
	EXCEPTION(0xb00)
	EXCEPTION(0xc00)
	EXCEPTION(0xd00)
	EXCEPTION(0xe00)
	EXCEPTION(0xe20)
	EXCEPTION(0xe40)
	EXCEPTION(0xe60)
	EXCEPTION(0xe80)
	EXCEPTION(0xf00)
	EXCEPTION(0xf20)
	EXCEPTION(0xf40)
	EXCEPTION(0xf60)
	EXCEPTION(0xf80)
	EXCEPTION(0x1000)
	EXCEPTION(0x1100)
	EXCEPTION(0x1200)
	EXCEPTION(0x1300)
	EXCEPTION(0x1400)
	EXCEPTION(0x1500)
	EXCEPTION(0x1600)

	.= 0x1e00
_exception:
	stdu	%r1,-STACK_FRAMESIZE(%r1)
	std	%r3,STACK_CFAR(%r1)
	std	%r4,STACK_TYPE(%r1)
	mfsprg0	%r3
	mfsprg1 %r4
	SAVE_GPR(0,%r1)
	SAVE_GPR(1,%r1)
	SAVE_GPR(2,%r1)
	SAVE_GPR(3,%r1)
	SAVE_GPR(4,%r1)
	SAVE_GPR(5,%r1)
	SAVE_GPR(6,%r1)
	SAVE_GPR(7,%r1)
	SAVE_GPR(8,%r1)
	SAVE_GPR(9,%r1)
	SAVE_GPR(10,%r1)
	SAVE_GPR(11,%r1)
	SAVE_GPR(12,%r1)
	SAVE_GPR(13,%r1)
	SAVE_GPR(14,%r1)
	SAVE_GPR(15,%r1)
	SAVE_GPR(16,%r1)
	SAVE_GPR(17,%r1)
	SAVE_GPR(18,%r1)
	SAVE_GPR(19,%r1)
	SAVE_GPR(20,%r1)
	SAVE_GPR(21,%r1)
	SAVE_GPR(22,%r1)
	SAVE_GPR(23,%r1)
	SAVE_GPR(24,%r1)
	SAVE_GPR(25,%r1)
	SAVE_GPR(26,%r1)
	SAVE_GPR(27,%r1)
	SAVE_GPR(28,%r1)
	SAVE_GPR(29,%r1)
	SAVE_GPR(30,%r1)
	SAVE_GPR(31,%r1)
	mfcr	%r3
	mfxer	%r4
	mfctr	%r5
	mflr	%r6
	stw	%r3,STACK_CR(%r1)
	stw	%r4,STACK_XER(%r1)
	std	%r5,STACK_CTR(%r1)
	std	%r6,STACK_LR(%r1)
	mfspr	%r3,SPR_SRR0
	mfspr	%r4,SPR_SRR1
	mfspr	%r5,SPR_HSRR0
	mfspr	%r6,SPR_HSRR1
	std	%r3,STACK_SRR0(%r1)
	std	%r4,STACK_SRR1(%r1)
	std	%r5,STACK_HSRR0(%r1)
	std	%r6,STACK_HSRR1(%r1)
	mfspr	%r3,SPR_DSISR
	mfspr	%r4,SPR_DAR
	stw	%r3,STACK_DSISR(%r1)
	std	%r4,STACK_DAR(%r1)
	mr	%r3,%r1
	LOAD_IMM64(%r4, SKIBOOT_BASE)
	LOAD_IMM32(%r5, exception_entry_foo - __head)
	add	%r4,%r4,%r5
	mtctr	%r4
	bctrl
	b	.
exception_entry_foo:
	b	exception_entry

	.= EXCEPTION_VECTORS_END
	/* This is the OPAL branch table. It's populated at boot time
	 * with function pointers to the various OPAL functions from
	 * the content of the .opal_table section, indexed by Token.
	 */
.global opal_branch_table
opal_branch_table:
	.space	8 * (OPAL_LAST + 1)

/* Stores the offset we were started from.  Used later on if we want to
 * read any unrelocated code/data such as the built-in kernel image
 */
.global boot_offset
boot_offset:
        .llong   0

/*
 *
 * Boot time entry point from FSP
 *
 * All CPUs come here
 *
 * Boot code NV register usage:
 *
 *   r31 :  Boot PIR
 *   r30 :  Current running offset
 *   r29 :  Target address
 *   r28 :  PVR
 *   r27 :  DTB pointer (or NULL)
 *   r26 :  PIR thread mask
 */
.global boot_entry
boot_entry:
	/* Check PVR and set some CR bits */
	mfspr	%r28,SPR_PVR
	li	%r26,3	/* Default to SMT4 */
	srdi	%r3,%r28,16
	cmpwi	cr0,%r3,PVR_TYPE_P7
	beq	1f
	cmpwi	cr0,%r3,PVR_TYPE_P7P
	beq	1f
	cmpwi	cr0,%r3,PVR_TYPE_P8
	beq	2f
	cmpwi	cr0,%r3,PVR_TYPE_P8E
	beq	2f
	cmpwi	cr0,%r3,PVR_TYPE_P8NVL
	beq	2f
	cmpwi	cr0,%r3,PVR_TYPE_P9
	beq 	1f
	attn		/* Unsupported CPU type... what do we do ? */
	b 	.	/* loop here, just in case attn is disabled */

	/* P8 -> 8 threads */
2:	li	%r26,7

	/* Get our reloc offset into r30 */
1:	bcl	20,31,$+4
1:	mflr	%r30
	subi	%r30,%r30,(1b - __head)

	/* Store reloc offset in boot_offset */
	LOAD_IMM32(%r3, boot_offset - __head)
	add     %r3,%r3,%r30
	std     %r30,0(%r3)

	/* Get ourselves a TOC & relocate it to our target address */
	LOAD_IMM32(%r2,__toc_start - __head)
	LOAD_IMM64(%r29, SKIBOOT_BASE)
	add	%r2,%r2,%r29

	/* Fixup our MSR (remove TA) */
	LOAD_IMM64(%r3, (MSR_HV | MSR_SF))
	mtmsrd	%r3,0

	/* Check our PIR, avoid threads */
	mfspr	%r31,SPR_PIR
	and.	%r0,%r31,%r26
	bne	secondary_wait

	/* Initialize per-core SPRs */
	bl	init_shared_sprs

	/* Pick a boot CPU, cpu index in r31 */
	LOAD_IMM32(%r3, boot_sem - __head)
	add	%r3,%r3,%r30
1:	lwarx	%r4,0,%r3
	addi	%r0,%r4,1
	stwcx.	%r0,0,%r3
	bne	1b
	isync
	cmpwi	cr0,%r4,0
	bne	secondary_wait

	/* Make sure we are in SMT medium */
	smt_medium

	/* Initialize thread SPRs */
	bl	init_replicated_sprs

	/* Save the initial offset. The secondary threads will spin on boot_flag
	 * before relocation so we need to keep track of its location to wake
	 * them up.
	 */
	mr	%r15,%r30

	/* Check if we need to copy ourselves up and update %r30 to
	 * be our new offset
	 */
	cmpd	%r29,%r30
	beq	2f
	LOAD_IMM32(%r3, _sbss - __head)
	srdi	%r3,%r3,3
	mtctr	%r3
	mr	%r4,%r30
	mr	%r30,%r29
	/* copy the skiboot image to the new offset */
1:	ld	%r0,0(%r4)
	std	%r0,0(%r29)
	addi	%r29,%r29,8
	addi	%r4,%r4,8
	bdnz	1b
	/* flush caches, etc */
	sync
	icbi	0,%r29
	sync
	isync
	/* branch to the new image location and continue */
	LOAD_IMM32(%r3, 2f - __head)
	add	%r3,%r3,%r30
	mtctr	%r3
	bctr

	/* Get ready for C code: get a stack */
2:	GET_STACK(%r1,%r31)

	/* Clear up initial frame.
	 * Zero back chain indicates stack entry from boot,
	 * non-zero indicates entry from OS (see backtrace code).
	 */
	li	%r3,0
	std	%r3,0(%r1)
	std	%r3,8(%r1)
	std	%r3,16(%r1)

	/* Relocate ourselves */
	bl	call_relocate

	/* Tell secondaries to move to second stage (relocated) spin loop */
	LOAD_IMM32(%r3, boot_flag - __head)
	add	%r3,%r3,%r15
	li	%r0,1
	stw	%r0,0(%r3)

	/* Clear BSS */
	li	%r0,0
	LOAD_ADDR_FROM_TOC(%r3, _sbss)
	LOAD_ADDR_FROM_TOC(%r4, _ebss)
	subf	%r4,%r3,%r4
	srdi	%r4,%r4,3
	mtctr	%r4
1:	std	%r0,0(%r3)
	addi	%r3,%r3,8
	bdnz	1b

	/* Get our per-cpu pointer into r13 */
	GET_CPU()

#ifdef STACK_CHECK_ENABLED
	/* Initialize stack bottom mark to 0, it will be updated in C code */
	li	%r0,0
	std	%r0,CPUTHREAD_STACK_BOT_MARK(%r13)
#endif
	/* Initialize the stack guard */
	LOAD_IMM64(%r3,STACK_CHECK_GUARD_BASE);
	xor	%r3,%r3,%r31
	std	%r3,0(%r13)

	/* Jump to C */
	mr	%r3,%r27
	bl	main_cpu_entry
	b	.

	/* Secondary CPUs wait here r31 is PIR */
secondary_wait:	
	/* The primary might be in the middle of relocating us,
	 * so first we spin on the boot_flag
	 */
	LOAD_IMM32(%r3, boot_flag - __head)
	add	%r3,%r3,%r30
1:	smt_lowest
	lwz	%r0,0(%r3)
	cmpdi	%r0,0
	beq	1b

	/* Init some registers */
	bl init_replicated_sprs

	/* Switch to new runtime address */
	mr	%r30,%r29
	LOAD_IMM32(%r3, 1f - __head)
	add	%r3,%r3,%r30
	mtctr	%r3
	isync
	bctr
1:
	/* Now wait for cpu_secondary_start to be set */
	LOAD_ADDR_FROM_TOC(%r3, cpu_secondary_start)
1:	smt_lowest
	ld	%r0,0(%r3)
	cmpdi	%r0,0
	beq	1b

	smt_medium

	/* Check our PIR is in bound */
	LOAD_ADDR_FROM_TOC(%r5, cpu_max_pir)
	lwz	%r5,0(%r5)
	cmpw	%r31,%r5
	bgt-	secondary_not_found

	/* Get our stack, cpu thread, and jump to C */
	GET_STACK(%r1,%r31)
	li	%r0,0
	std	%r0,0(%r1)
	std	%r0,16(%r1)
	GET_CPU()

	bl	secondary_cpu_entry
	b	.

	/* Not found... what to do ? set some global error ? */
secondary_not_found:
	smt_lowest
	b	.

call_relocate:
	mflr	%r14
	LOAD_IMM32(%r4,__dynamic_start - __head)
	LOAD_IMM32(%r5,__rela_dyn_start - __head)
	add	%r4,%r4,%r30
	add	%r5,%r5,%r30
	mr	%r3,%r30
	bl	relocate
	cmpwi	%r3,0
	bne	1f
	mtlr	%r14
	blr
1:	/* Fatal relocate failure */
	attn

#define FIXUP_ENDIAN                                              \
       tdi   0,0,0x48;   /* Reverse endian of b . + 8          */ \
       b     $+36;       /* Skip trampoline if endian is good  */ \
       .long 0x05009f42; /* bcl 20,31,$+4                      */ \
       .long 0xa602487d; /* mflr r10                           */ \
       .long 0x1c004a39; /* addi r10,r10,28                    */ \
       .long 0xa600607d; /* mfmsr r11                          */ \
       .long 0x01006b69; /* xori r11,r11,1                     */ \
       .long 0xa6035a7d; /* mtsrr0 r10                         */ \
       .long 0xa6037b7d; /* mtsrr1 r11                         */ \
       .long 0x2400004c  /* rfid                               */

.global enable_machine_check
enable_machine_check:
	mflr	%r0
	bcl	20,31,$+4
0:	mflr	%r3
	addi	%r3,%r3,(1f - 0b)
	mtspr	SPR_HSRR0,%r3
	mfmsr	%r3
	ori	%r3,%r3,MSR_ME
	mtspr	SPR_HSRR1,%r3
	hrfid
1:	mtlr	%r0
	blr

.global disable_machine_check
disable_machine_check:
	mflr	%r0
	bcl	20,31,$+4
0:	mflr	%r3
	addi	%r3,%r3,(1f - 0b)
	mtspr	SPR_HSRR0,%r3
	mfmsr	%r3
	li	%r4,MSR_ME
	andc	%r3,%r3,%r4
	mtspr	SPR_HSRR1,%r3
	hrfid
1:	mtlr	%r0
	blr

pm_save_regs:
	SAVE_GPR(2,%r1)
	SAVE_GPR(14,%r1)
	SAVE_GPR(15,%r1)
	SAVE_GPR(16,%r1)
	SAVE_GPR(17,%r1)
	SAVE_GPR(18,%r1)
	SAVE_GPR(19,%r1)
	SAVE_GPR(20,%r1)
	SAVE_GPR(21,%r1)
	SAVE_GPR(22,%r1)
	SAVE_GPR(23,%r1)
	SAVE_GPR(24,%r1)
	SAVE_GPR(25,%r1)
	SAVE_GPR(26,%r1)
	SAVE_GPR(27,%r1)
	SAVE_GPR(28,%r1)
	SAVE_GPR(29,%r1)
	SAVE_GPR(30,%r1)
	SAVE_GPR(31,%r1)
	mfcr	%r4
	mfxer	%r5
	mfspr	%r6,SPR_HSPRG0
	mfspr	%r7,SPR_HSPRG1
	stw	%r4,STACK_CR(%r1)
	stw	%r5,STACK_XER(%r1)
	std	%r6,STACK_GPR0(%r1)
	std	%r7,STACK_GPR1(%r1)
	blr

.global enter_p8_pm_state
enter_p8_pm_state:
	/* Before entering map or rvwinkle, we create a stack frame
	 * and save our non-volatile registers.
	 *
	 * We also save these SPRs:
	 *
	 *  - HSPRG0	in GPR0 slot
	 *  - HSPRG1	in GPR1 slot
	 *
	 *  - xxx TODO: HIDs
	 *  - TODO: Mask MSR:ME during the process
	 *
	 * On entry, r3 indicates:
	 *
	 *    0 = nap
	 *    1 = rvwinkle
	 */
	mflr	%r0
	std	%r0,16(%r1)
	stdu	%r1,-STACK_FRAMESIZE(%r1)

	bl	pm_save_regs

	/* Save stack pointer in struct cpu_thread */
	std	%r1,CPUTHREAD_SAVE_R1(%r13)

	/* Winkle or nap ? */
	cmpli	%cr0,0,%r3,0
	bne	1f

	/* nap sequence */
	ptesync
0:	ld	%r0,CPUTHREAD_SAVE_R1(%r13)
	cmpd	cr0,%r0,%r0
	bne	0b
	PPC_INST_NAP
	b	.

	/* rvwinkle sequence */
1:	ptesync
0:	ld	%r0,CPUTHREAD_SAVE_R1(%r13)
	cmpd	cr0,%r0,%r0
	bne	0b
	PPC_INST_RVWINKLE
	b	.

.global enter_p9_pm_lite_state
enter_p9_pm_lite_state:
	mtspr	SPR_PSSCR,%r3
	PPC_INST_STOP
	blr

.global enter_p9_pm_state
enter_p9_pm_state:
	mflr	%r0
	std	%r0,16(%r1)
	stdu	%r1,-STACK_FRAMESIZE(%r1)

	bl	pm_save_regs

	/* Save stack pointer in struct cpu_thread */
	std	%r1,CPUTHREAD_SAVE_R1(%r13)

	mtspr	SPR_PSSCR,%r3
	PPC_INST_STOP
	b	.

/* This is a little piece of code that is copied down to
 * 0x100 for handling power management wakeups
 */
.global reset_patch_start
reset_patch_start:
	FIXUP_ENDIAN
	smt_medium
	LOAD_IMM64(%r30, SKIBOOT_BASE)
	LOAD_IMM32(%r3, reset_wakeup - __head)
	add	%r3,%r30,%r3
	mtctr	%r3
	bctr
.global reset_patch_end
reset_patch_end:

reset_wakeup:
	/* Get PIR */
	mfspr	%r31,SPR_PIR

	/* Get that CPU stack base and use it to restore r13 */
	GET_STACK(%r1,%r31)
	GET_CPU()

	/* Restore original stack pointer */
	ld	%r3,CPUTHREAD_SAVE_R1(%r13)

	/* If it's 0, we are doing a fast reboot */
	cmpldi	%r3,0
	beq	fast_reset_entry
	mr	%r1,%r3

	/* Restore more stuff */
	lwz	%r3,STACK_CR(%r1)
	lwz	%r4,STACK_XER(%r1)
	ld	%r5,STACK_GPR0(%r1)
	ld	%r6,STACK_GPR1(%r1)
	mtcr	%r3
	mtxer	%r4
	mtspr	SPR_HSPRG0,%r5
	mtspr	SPR_HSPRG1,%r6
	REST_GPR(2,%r1)
	REST_GPR(14,%r1)
	REST_GPR(15,%r1)
	REST_GPR(16,%r1)
	REST_GPR(17,%r1)
	REST_GPR(18,%r1)
	REST_GPR(19,%r1)
	REST_GPR(20,%r1)
	REST_GPR(21,%r1)
	REST_GPR(22,%r1)
	REST_GPR(23,%r1)
	REST_GPR(24,%r1)
	REST_GPR(25,%r1)
	REST_GPR(26,%r1)
	REST_GPR(27,%r1)
	REST_GPR(28,%r1)
	REST_GPR(29,%r1)
	REST_GPR(30,%r1)
	REST_GPR(31,%r1)

	/* Get LR back, pop stack and return */
	addi	%r1,%r1,STACK_FRAMESIZE
	ld	%r0,16(%r1)
	mtlr	%r0
	blr

/* Fast reset code. We clean up the TLB and a few SPRs and
 * return to C code. All CPUs do that, the CPU triggering the
 * reset does it to itself last. The C code will sort out who
 * the master is. We come from the trampoline above with
 * r30 containing SKIBOOT_BASE
 */
fast_reset_entry:
	/* Clear out SLB */
	li	%r6,0
	slbmte	%r6,%r6
	slbia
	ptesync

	/* Dummy stack frame */
	li	%r3,0
	std	%r3,0(%r1)
	std	%r3,8(%r1)
	std	%r3,16(%r1)

	/* Get our TOC */
	addis	%r2,%r30,(__toc_start - __head)@ha
	addi	%r2,%r2,(__toc_start - __head)@l

	/* Go to C ! */
	bl	fast_reboot_entry
	b	.

/* Functions to initialize replicated and shared SPRs to sane
 * values. This is called at boot and on soft-reset
 */
.global init_shared_sprs
init_shared_sprs:
	li	%r0,0
	mtspr	SPR_AMOR, %r0

	mfspr	%r3,SPR_PVR
	srdi	%r3,%r3,16
	cmpwi	cr0,%r3,PVR_TYPE_P7
	beq	1f
	cmpwi	cr0,%r3,PVR_TYPE_P7P
	beq	2f
	cmpwi	cr0,%r3,PVR_TYPE_P8E
	beq	3f
	cmpwi	cr0,%r3,PVR_TYPE_P8
	beq	3f
	cmpwi	cr0,%r3,PVR_TYPE_P8NVL
	beq	3f
	cmpwi	cr0,%r3,PVR_TYPE_P9
	beq	4f
	/* Unsupported CPU type... what do we do ? */
	b	9f

1:	/* P7 */
	mtspr	SPR_SDR1, %r0
	/* TSCR: Value from pHyp */
	LOAD_IMM32(%r3,0x880DE880)
	mtspr	SPR_TSCR, %r3
	b	9f

2:	/* P7+ */
	mtspr	SPR_SDR1, %r0
	/* TSCR: Recommended value by HW folks */
	LOAD_IMM32(%r3,0x88CDE880)
	mtspr	SPR_TSCR, %r3
	b	9f

3:	/* P8E/P8 */
	mtspr	SPR_SDR1, %r0
	/* TSCR: Recommended value by HW folks */
	LOAD_IMM32(%r3,0x8ACC6880)
	mtspr	SPR_TSCR, %r3

	/* HID0: Clear bit 13 (enable core recovery)
	 *       Clear bit 19 (HILE)
	 */
	mfspr	%r3,SPR_HID0
	li	%r0,1
	sldi	%r4,%r0,(63-13)
	sldi	%r5,%r0,(63-19)
	or	%r0,%r4,%r5
	andc	%r3,%r3,%r0
	sync
	mtspr	SPR_HID0,%r3
	mfspr	%r3,SPR_HID0
	mfspr	%r3,SPR_HID0
	mfspr	%r3,SPR_HID0
	mfspr	%r3,SPR_HID0
	mfspr	%r3,SPR_HID0
	mfspr	%r3,SPR_HID0
	isync
	/* HMEER: Enable HMIs for core recovery and TOD errors. */
	LOAD_IMM64(%r0,SPR_HMEER_HMI_ENABLE_MASK)
	mfspr	%r3,SPR_HMEER
	or	%r3,%r3,%r0
	sync
	mtspr	SPR_HMEER,%r3
	isync
	/* RPR (per-LPAR but let's treat it as replicated for now) */
	LOAD_IMM64(%r3,0x00000103070F1F3F)
	mtspr	SPR_RPR,%r3
	b	9f

4:	/* P9 */
	/* TSCR: Recommended value by HW folks */
	LOAD_IMM32(%r3,0x80287880)
	mtspr	SPR_TSCR, %r3
	/* HID0: Clear bit 5 (enable core recovery)
	 *       Clear bit 4 (HILE)
	 */
	mfspr	%r3,SPR_HID0
	li	%r0,1
	sldi	%r4,%r0,(63-5)
	sldi	%r5,%r0,(63-4)
	or	%r0,%r4,%r5
	andc	%r3,%r3,%r0
	sync
	mtspr	SPR_HID0,%r3
	isync
	/* HMEER: Enable HMIs for core recovery and TOD errors. */
	LOAD_IMM64(%r0,SPR_HMEER_HMI_ENABLE_MASK)
	mfspr	%r3,SPR_HMEER
	or	%r3,%r3,%r0
	sync
	mtspr	SPR_HMEER,%r3
	isync

	LOAD_IMM64(%r3,0x00000103070F1F3F)
	mtspr	SPR_RPR,%r3
9:	blr

.global init_replicated_sprs
init_replicated_sprs:
	mfspr	%r3,SPR_PVR
	srdi	%r3,%r3,16
	cmpwi	cr0,%r3,PVR_TYPE_P7
	beq	1f
	cmpwi	cr0,%r3,PVR_TYPE_P7P
	beq	1f
	cmpwi	cr0,%r3,PVR_TYPE_P8E
	beq	3f
	cmpwi	cr0,%r3,PVR_TYPE_P8
	beq	3f
	cmpwi	cr0,%r3,PVR_TYPE_P8NVL
	beq	3f
	cmpwi	cr0,%r3,PVR_TYPE_P9
	beq	4f
	/* Unsupported CPU type... what do we do ? */
	b	9f

1:	/* P7, P7+ */
	/* LPCR: sane value */
	LOAD_IMM64(%r3,0x0040000000000004)
	mtspr	SPR_LPCR, %r3
	sync
	isync
	LOAD_IMM64(%r3,0x0)
	mtspr	SPR_DSCR,%r3
	b	9f

3:	/* P8, P8E */
	/* LPCR: sane value */
	LOAD_IMM64(%r3,0x0040000000000000)
	mtspr	SPR_LPCR, %r3
	sync
	isync
	LOAD_IMM64(%r3,0x0)
	mtspr	SPR_DSCR,%r3
	b	9f

4:	/* P9 */
	/* LPCR: sane value */
	LOAD_IMM64(%r3,0x0040000000000000)
	mtspr	SPR_LPCR, %r3
	sync
	isync
	/* DSCR: Stride-N Stream Enable */
	LOAD_IMM64(%r3,0x0000000000000010)
	mtspr	SPR_DSCR,%r3

9:	blr

	.global enter_nap
enter_nap:
	std	%r0,0(%r1)
	ptesync
	ld	%r0,0(%r1)
1:	cmp	%cr0,0,%r0,%r0
	bne	1b
	nap
	b	.
/*
 *
 * NACA structure, accessed by the FPS to find the SPIRA
 *
 */
	. = 0x4000
.global naca
naca:
	.llong	spirah			/* 0x0000 : SPIRA-H  */
	.llong	0			/* 0x0008 : Reserved */
	.llong	0			/* 0x0010 : Reserved */
	.llong	hv_release_data		/* 0x0018 : HV release data */
	.llong	0			/* 0x0020 : Reserved */
	.llong	0			/* 0x0028 : Reserved */
	.llong	spira			/* 0x0030 : SP Interface Root */
	.llong	hv_lid_load_table	/* 0x0038 : LID load table */
	.llong	0			/* 0x0040 : Reserved */
	.space	68
	.long	0			/* 0x008c : Reserved */
	.space	16
	.long	SPIRA_ACTUAL_SIZE	/* 0x00a0 : Actual size of SPIRA */
	.space	28
	.llong	0			/* 0x00c0 : resident module loadmap */
	.space	136
	.llong	0			/* 0x0150 : reserved */
	.space	40
	.llong	0			/* 0x0180 : reserved */
	.space	36
	.long	0			/* 0x01ac : control flags */
	.byte	0			/* 0x01b0 : reserved */
	.space	4
	.byte	0			/* 0x01b5 : default state for SW attn */
	.space	1
	.byte	0x01			/* 0x01b7 : PCIA format */
	.llong	hdat_entry		/* 0x01b8 : Primary thread entry */
	.llong	hdat_entry		/* 0x01c0 : Secondary thread entry */
	.space	0xe38

	.balign	0x10
hv_release_data:
	.space	58
	.llong	0x666			/* VRM ? */

	.balign	0x10
hv_lid_load_table:
	.long	0x10
	.long	0x10
	.long	0
	.long	0

/*
 *
 * OPAL variant of NACA. This is only used when booting a P7 in OPAL mode.
 *
 */
.global opal_naca
opal_naca:
	.llong	opal_boot_trampoline	/* Primary entry (used ?) */
	.llong	opal_boot_trampoline	/* Secondary entry (used ?) */
	.llong	spira			/* Spira pointer */
	.llong	0			/* Load address */
	.llong	opal_boot_trampoline	/* 0x180 trampoline */
	.llong	0			/* More stuff as seen in objdump ...*/
	.llong	0
	.llong	0
	.llong	0

	/* The FSP seems to ignore our primary/secondary entry
	 * points and instead copy that bit down to 0x180 and
	 * patch the first instruction to get our expected
	 * boot CPU number. We ignore that patching for now and
	 * got to the same entry we use for pHyp and FDT HB.
	 */
opal_boot_trampoline:
	li	%r27,-1
	ba	boot_entry - __head

/*
 *
 * OPAL entry point from operating system
 *
 * Register usage:
 *
 *       r0: Token
 *       r2: OPAL Base
 *  r3..r10: Args
 * r11..r12: Scratch
 * r13..r31: Preserved
 */
	.balign	0x10
.global opal_entry
opal_entry:
	/* Get our per CPU pointer in r12 to check for quiesce */
	mfspr	%r12,SPR_PIR
	GET_STACK(%r12,%r12)

	/* Get CPU thread */
	clrrdi	%r12,%r12,STACK_SHIFT

	/*
	 * OPAL entry must first increment in_opal_call, then check
	 * for quiesce, without touching the stack or clobbering
	 * registers other than r11 and r12 and cr0. In this way, OPAL
	 * is tolerant of re-entry on this same CPU while it is spinning
	 * for quiesce.
	 *
	 * Sequence goes:
	 * in_opal_call++;
	 * sync;
	 * if (quiesce_opal_call) {
	 *     in_opal_call--;
	 *     reject-or-spin-then-retry;
	 */
1:	lwz	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	addi	%r11,%r11,1
	stw	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	/*
	 * Order the store in_opal_call vs load quiesce_opal_call.
	 * This also provides an acquire barrier for opal entry vs
	 * another thread quiescing opal. In this way, quiescing
	 * can behave as mutual exclusion.
	 */
	sync
	lwz	%r11,CPUTHREAD_QUIESCE_OPAL_CALL(%r12)
	cmpwi	%cr0,%r11,0
	beq+	4f
	/* We are quiescing, hold or reject */
	cmpwi	%cr0,%r11,QUIESCE_REJECT
	bne	2f
	li	%r3,OPAL_BUSY
	b	.Lreturn /* reject */
2:	/* hold */
	lwz	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	subi	%r11,%r11,1
	stw	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	smt_lowest
3:	lwz	%r11,CPUTHREAD_QUIESCE_OPAL_CALL(%r12)
	cmpwi	%cr0,%r11,QUIESCE_HOLD
	beq	3b
	/* spin finished, try again */
	smt_medium
	b	1b

4:	/* Quiesce protocol done, get our per CPU stack */
	/* Emergency stack if we have re-entered OPAL */
	lwz	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	cmpwi	%r11,1

	mfspr	%r12,SPR_PIR
	bgt	5f
	GET_STACK(%r12,%r12)
	b	6f
5:
	GET_EMERGENCY_STACK(%r12,%r12)
6:
	stdu	%r12,-STACK_FRAMESIZE(%r12)

	/* Save caller r1, establish new r1 */
	std	%r1,0(%r12)
	std	%r1,STACK_GPR1(%r12)
	mr	%r1,%r12

	/* Save arguments because we call C */
	std	%r3,STACK_GPR3(%r1)
	std	%r4,STACK_GPR4(%r1)
	std	%r5,STACK_GPR5(%r1)
	std	%r6,STACK_GPR6(%r1)
	std	%r7,STACK_GPR7(%r1)
	std	%r8,STACK_GPR8(%r1)
	std	%r9,STACK_GPR9(%r1)
	std	%r10,STACK_GPR10(%r1)

	/* Save Token (r0), LR and r13 */
	mflr	%r12
	std	%r0,STACK_GPR0(%r1)
	std	%r13,STACK_GPR13(%r1)
	std	%r12,STACK_LR(%r1)

	/* Get the CPU thread */
	GET_CPU()

	/* Store token in CPU thread */
	std	%r0,CPUTHREAD_CUR_TOKEN(%r13)

	/* Mark the stack frame */
	li	%r12,STACK_ENTRY_OPAL_API
	std	%r12,STACK_TYPE(%r1)

	/* Get our TOC */
	addis	%r2,%r2,(__toc_start - __head)@ha
	addi	%r2,%r2,(__toc_start - __head)@l

	/* Check entry */
	mr	%r3,%r1
	bl	opal_entry_check
	cmpdi	%r3,0
	bne	.Lreturn

	ld	%r0,STACK_GPR0(%r1)
	ld	%r3,STACK_GPR3(%r1)
	ld	%r4,STACK_GPR4(%r1)
	ld	%r5,STACK_GPR5(%r1)
	ld	%r6,STACK_GPR6(%r1)
	ld	%r7,STACK_GPR7(%r1)
	ld	%r8,STACK_GPR8(%r1)
	ld	%r9,STACK_GPR9(%r1)
	ld	%r10,STACK_GPR10(%r1)

	/* Convert our token into a table entry and get the
	 * function pointer. Also check the token.
	 * For ELFv2 ABI, the local entry point is used so no need for r12.
	 */
	sldi	%r0,%r0,3
	LOAD_ADDR_FROM_TOC(%r12, opal_branch_table)
	ldx	%r0,%r12,%r0
	mtctr	%r0

	/* Jump ! */
	bctrl

	mr	%r4,%r1
	bl	opal_exit_check /* r3 is preserved */

	/*
	 * Restore r1 and r13 before decrementing in_opal_call.
	 * Move per-cpu pointer to volatile r12, restore lr, r1, r13.
	 */
.Lreturn:
	ld	%r12,STACK_LR(%r1)
	mtlr	%r12
	mr	%r12,%r13
	ld	%r13,STACK_GPR13(%r1)
	ld	%r1,STACK_GPR1(%r1)
	sync 	/* release barrier vs quiescing */
	lwz	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	subi	%r11,%r11,1
	stw	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
	blr

.global start_kernel
start_kernel:
	sync
	icbi	0,%r3
	sync
	isync
	mtctr	%r3
	mr	%r3,%r4
	LOAD_IMM64(%r8,SKIBOOT_BASE);
	LOAD_IMM32(%r10, opal_entry - __head)
	add	%r9,%r8,%r10
	LOAD_IMM32(%r6, EPAPR_MAGIC)
	addi	%r7,%r5,1
	li	%r4,0
	li	%r5,0
	bctr

	.global start_kernel32
start_kernel32:
	mfmsr	%r10
	clrldi	%r10,%r10,1
	mtmsrd	%r10,0
	sync
	isync
	b	start_kernel

.global start_kernel_secondary
start_kernel_secondary:
	sync
	isync
	mtctr	%r3
	mfspr	%r3,SPR_PIR
	bctr