aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2016-07-24 09:32:10 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2016-10-17 10:33:28 +1100
commit0279d8951ead549fdebce93130a2f6c673081862 (patch)
treeb0fee12867dbf0a4796ec98b3d4b12907d604ab0
parent8110b0595f0de2df18a06e8e9aff66db45872fa7 (diff)
downloadskiboot-0279d8951ead549fdebce93130a2f6c673081862.zip
skiboot-0279d8951ead549fdebce93130a2f6c673081862.tar.gz
skiboot-0279d8951ead549fdebce93130a2f6c673081862.tar.bz2
Fast reboot for P8
This is an experimental patch that implements "Fast reboot" on P8 machines. The basic idea is that when the OS calls OPAL reboot, we gather all the threads in the system using a combination of patching the reset vector and soft-resetting them, then cleanup a few bits of hardware (we do re-probe PCIe for example), and reload & restart the bootloader. For Trusted Boot, this means we *add* measurements to the TPM, so you will get *different* PCR values as compared to a full IPL. This makes sense as if you want to be sure you are running something known then, well, do a full IPL as soft reset should never be trusted to clear any malicious code. This is very experimental and needs a lot of testing and also auditing code for other bits of HW that might need to be cleaned up. BenH TODO: I also need to check if we are properly PERST'ing PCI devices. This is partially based on old code I had to do that on P7. I only support it on P8 though as there are issues with the PSI interrupts on P7 that cannot be reliably solved. Even though this should be considered somewhat experimental, we've had a lot of success on a variety of machines. Dozens/hundreds of reboots across Tuleta, Garrison and Habanero. Currently, we've hidden it behind a NVRAM config option, which *is* liable to change in the future (to ensure that only those who know what they're doing enable it) You can enable the experimental support via nvram option: nvram -p ibm,skiboot --update-config experimental-fast-reset=feeling-lucky Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [stewart@linux.vnet.ibm.com: hide behind nvram option, include Mambo fixes from Mikey] Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
-rw-r--r--asm/head.S122
-rw-r--r--core/fast-reboot.c538
-rw-r--r--core/init.c8
-rw-r--r--core/lock.c3
-rw-r--r--core/pci.c12
-rw-r--r--core/platform.c9
-rw-r--r--hw/fsp/fsp-console.c5
-rw-r--r--hw/fsp/fsp-leds.c3
-rw-r--r--hw/occ.c5
-rw-r--r--hw/psi.c103
-rw-r--r--hw/slw.c7
-rw-r--r--include/config.h3
-rw-r--r--include/cpu.h1
-rw-r--r--include/processor.h1
-rw-r--r--include/skiboot.h9
-rw-r--r--platforms/mambo/mambo.c10
16 files changed, 527 insertions, 312 deletions
diff --git a/asm/head.S b/asm/head.S
index a4105ca..4f70d11 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -458,64 +458,6 @@ call_relocate:
1: /* Fatal relocate failure */
attn
-/* This is a little piece of code that is copied down to
- * 0x100 when doing a "fast reset"
- */
-.global fast_reset_patch_start
-fast_reset_patch_start:
- smt_medium
- LOAD_IMM64(%r30, SKIBOOT_BASE)
- LOAD_IMM32(%r3, fast_reset_entry - __head)
- add %r3,%r30,%r3
- mtctr %r3
- bctr
-.global fast_reset_patch_end
-fast_reset_patch_end:
-
-/* Fast reset code. We clean up the TLB and a few SPRs and
- * return to C code. All CPUs do that, the CPU triggering the
- * reset does it to itself last. The C code will sort out who
- * the master is. We come from the trampoline above with
- * r30 containing SKIBOOT_BASE
- */
-fast_reset_entry:
- /* Clear out SLB */
- li %r6,0
- slbmte %r6,%r6
- slbia
- ptesync
-
- /* Get PIR */
- mfspr %r31,SPR_PIR
-
- /* Get a stack and restore r13 */
- GET_STACK(%r1,%r31)
- li %r3,0
- std %r3,0(%r1)
- std %r3,8(%r1)
- std %r3,16(%r1)
- GET_CPU()
-
- /* Get our TOC */
- addis %r2,%r30,(__toc_start - __head)@ha
- addi %r2,%r2,(__toc_start - __head)@l
-
- /* Go to C ! */
- bl fast_reboot
- b .
-
-.global cleanup_tlb
-cleanup_tlb:
- /* Clean the TLB */
- li %r3,128
- mtctr %r3
- li %r4,0x800 /* IS field = 0b10 */
- ptesync
-1: tlbiel %r4
- addi %r4,%r4,0x1000
- bdnz 1b
- ptesync
-
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
b $+36; /* Skip trampoline if endian is good */ \
@@ -624,7 +566,12 @@ reset_wakeup:
GET_CPU()
/* Restore original stack pointer */
- ld %r1,CPUTHREAD_SAVE_R1(%r13)
+ ld %r3,CPUTHREAD_SAVE_R1(%r13)
+
+ /* If it's 0, we are doing a fast reboot */
+ cmpldi %r3,0
+ beq fast_reset_entry
+ mr %r1,%r3
/* Restore more stuff */
lwz %r3,STACK_CR(%r1)
@@ -661,6 +608,46 @@ reset_wakeup:
mtlr %r0
blr
+/* Fast reset code. We clean up the TLB and a few SPRs and
+ * return to C code. All CPUs do that, the CPU triggering the
+ * reset does it to itself last. The C code will sort out who
+ * the master is. We come from the trampoline above with
+ * r30 containing SKIBOOT_BASE
+ */
+fast_reset_entry:
+ /* Clear out SLB */
+ li %r6,0
+ slbmte %r6,%r6
+ slbia
+ ptesync
+
+ /* Dummy stack frame */
+ li %r3,0
+ std %r3,0(%r1)
+ std %r3,8(%r1)
+ std %r3,16(%r1)
+
+ /* Get our TOC */
+ addis %r2,%r30,(__toc_start - __head)@ha
+ addi %r2,%r2,(__toc_start - __head)@l
+
+ /* Go to C ! */
+ bl fast_reboot_entry
+ b .
+
+.global cleanup_tlb
+cleanup_tlb:
+ /* Clean the TLB */
+ li %r3,512
+ mtctr %r3
+ li %r4,0xc00 /* IS field = 0b11 */
+ ptesync
+1: tlbiel %r4
+ addi %r4,%r4,0x1000
+ bdnz 1b
+ ptesync
+ blr
+
/* Functions to initialize replicated and shared SPRs to sane
* values. This is called at boot and on soft-reset
*/
@@ -708,10 +695,14 @@ init_shared_sprs:
mtspr SPR_LPCR,%r3
sync
isync
- /* HID0: Clear bit 13 (enable core recovery) */
+ /* HID0: Clear bit 13 (enable core recovery)
+ * Clear bit 19 (HILE)
+ */
mfspr %r3,SPR_HID0
li %r0,1
- sldi %r0,%r0,(63-13)
+ sldi %r4,%r0,(63-13)
+ sldi %r5,%r0,(63-19)
+ or %r0,%r4,%r5,
andc %r3,%r3,%r0
sync
mtspr SPR_HID0,%r3
@@ -743,6 +734,15 @@ init_replicated_sprs:
/* XXX TODO: Add more */
blr
+ .global enter_nap
+enter_nap:
+ std %r0,0(%r1)
+ ptesync
+ ld %r0,0(%r1)
+1: cmp %cr0,%r0,%r0
+ bne 1b
+ nap
+ b .
/*
*
* NACA structure, accessed by the FPS to find the SPIRA
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 30b77e9..cf8b3d4 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -25,242 +25,420 @@
#include <timebase.h>
#include <pci.h>
#include <chip.h>
+#include <chiptod.h>
+
+#define P8_EX_TCTL_DIRECT_CONTROLS(t) (0x10013000 + (t) * 0x10)
+#define P8_DIRECT_CTL_STOP PPC_BIT(63)
+#define P8_DIRECT_CTL_PRENAP PPC_BIT(47)
+#define P8_DIRECT_CTL_SRESET PPC_BIT(60)
-/*
- * To get control of all threads, we sreset them via XSCOM after
- * patching the 0x100 vector. This will work as long as the target
- * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider
- * a more messy approach.
- *
- * The SCOM register we want is called "Core RAS Control" in the doc
- * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list
- *
- * Bits in there change from CPU rev to CPU rev but the bit we care
- * about, bit 60 "sreset_request" appears to have stuck to the same
- * place in both P7 and P7+. The register also has the same SCOM
- * address
- */
-#define EX0_TCTL_DIRECT_CONTROLS0 0x08010400
-#define EX0_TCTL_DIRECT_CONTROLS1 0x08010440
-#define EX0_TCTL_DIRECT_CONTROLS2 0x08010480
-#define EX0_TCTL_DIRECT_CONTROLS3 0x080104c0
-#define TCTL_DC_SRESET_REQUEST PPC_BIT(60)
/* Flag tested by the OPAL entry code */
uint8_t reboot_in_progress;
-static struct cpu_thread *resettor, *resettee;
+static volatile bool fast_boot_release;
+static struct cpu_thread *last_man_standing;
+static struct lock reset_lock = LOCK_UNLOCKED;
-static void flush_caches(void)
+static int set_special_wakeup(struct cpu_thread *cpu)
{
- uint64_t base = SKIBOOT_BASE;
- uint64_t end = base + SKIBOOT_SIZE;
+ uint64_t val, poll_target, stamp;
+ uint32_t core_id;
+ int rc;
+
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
- /* Not sure what the effect of sreset is on cores, so let's
- * shoot a series of dcbf's on all cachelines that make up
- * our core memory just in case...
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
+
+ prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id);
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return OPAL_SUCCESS;
+
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not necessary
*/
- while(base < end) {
- asm volatile("dcbf 0,%0" : : "r" (base) : "memory");
- base += 128;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP),
+ PPC_BIT(0));
+ if (rc) {
+ prerror("RESET: XSCOM error %d asserting special"
+ " wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
}
- sync();
+
+ /*
+ * HWP uses the history for Perf register here, dunno why it uses
+ * that one instead of the pHyp one, maybe to avoid clobbering it...
+ *
+ * In any case, it does that to check for run/nap vs.sleep/winkle/other
+ * to decide whether to poll on checkstop or not. Since we don't deal
+ * with checkstop conditions here, we ignore that part.
+ */
+
+ /*
+ * Now poll for completion of special wakeup. The HWP is nasty here,
+ * it will poll at 5ms intervals for up to 200ms. This is not quite
+ * acceptable for us at runtime, at least not until we have the
+ * ability to "context switch" HBRT. In practice, because we don't
+ * winkle, it will never take that long, so we increase the polling
+ * frequency to 1us per poll. However we do have to keep the same
+ * timeout.
+ *
+ * We don't use time_wait_ms() either for now as we don't want to
+ * poll the FSP here.
+ */
+ stamp = mftb();
+ poll_target = stamp + msecs_to_tb(200);
+ val = 0;
+ while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) {
+ /* Wait 1 us */
+ time_wait_us(1);
+
+ /* Read PM state */
+ rc = xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0),
+ &val);
+ if (rc) {
+ prerror("RESET: XSCOM error %d reading PM state on"
+ " 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+ /* Check timeout */
+ if (mftb() > poll_target)
+ break;
+ }
+
+ /* Success ? */
+ if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) {
+ uint64_t now = mftb();
+ prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n",
+ tb_to_usecs(now - stamp));
+ return 0;
+ }
+
+ /*
+ * We timed out ...
+ *
+ * HWP has a complex workaround for HW255321 which affects
+ * Murano DD1 and Venice DD1. Ignore that for now
+ *
+ * Instead we just dump some XSCOMs for error logging
+ */
+ prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir);
+ prerror("RESET: PM0 = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+ prerror("RESET: SPC_WKUP = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_IDLE_STATE_HISTORY_PHYP),
+ &val);
+ prerror("RESET: HISTORY = 0x%016llx\n", val);
+
+ return OPAL_HARDWARE;
}
-static bool do_reset_core_p7(struct cpu_thread *cpu)
+static int clr_special_wakeup(struct cpu_thread *cpu)
{
- uint32_t xscom_addr, chip;
- uint64_t ctl;
+ uint64_t val;
+ uint32_t core_id;
int rc;
- /* Add the Core# */
- xscom_addr = EX0_TCTL_DIRECT_CONTROLS0;
- xscom_addr |= ((cpu->pir >> 2) & 7) << 24;
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
- chip = pir_to_chip_id(cpu->pir);
+ prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id);
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return OPAL_SUCCESS;
- ctl = TCTL_DC_SRESET_REQUEST;
- rc = xscom_write(chip, xscom_addr, ctl);
- rc |= xscom_write(chip, xscom_addr + 0x40, ctl);
- rc |= xscom_write(chip, xscom_addr + 0x80, ctl);
- rc |= xscom_write(chip, xscom_addr + 0xc0, ctl);
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not necessary
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP), 0);
if (rc) {
- prerror("RESET: Error %d resetting CPU 0x%04x\n",
- rc, cpu->pir);
- return false;
+ prerror("RESET: XSCOM error %d deasserting"
+ " special wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
}
- return true;
+
+ /*
+ * The original HWp reads the XSCOM again with the comment
+ * "This puts an inherent delay in the propagation of the reset
+ * transition"
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ return 0;
}
-static void fast_reset_p7(void)
+extern unsigned long callthru_tcl(const char *str, int len);
+
+static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
+{
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t xscom_addr;
+ char tcl_cmd[50];
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ if (bits != P8_DIRECT_CTL_SRESET)
+ return;
+ snprintf(tcl_cmd, sizeof(tcl_cmd), "mysim cpu %i:%i set spr pc 0x100", core_id, thread_id);
+ callthru_tcl(tcl_cmd, strlen(tcl_cmd));
+ return;
+ }
+
+ xscom_addr = XSCOM_ADDR_P8_EX(core_id,
+ P8_EX_TCTL_DIRECT_CONTROLS(thread_id));
+
+ xscom_write(chip_id, xscom_addr, bits);
+}
+
+static bool fast_reset_p8(void)
{
struct cpu_thread *cpu;
- resettee = this_cpu();
- resettor = NULL;
+ /* Mark ourselves as last man standing in need of a reset */
+ last_man_standing = this_cpu();
- /* Pick up a candidate resettor. We do that before we flush
- * the caches
- */
+ prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n",
+ this_cpu()->pir, pir_to_core_id(this_cpu()->pir));
+
+ /* Assert special wakup on all cores */
for_each_cpu(cpu) {
- /*
- * Some threads might still be in skiboot.
- *
- * But because we deal with entire cores and we don't want
- * to special case things, we are just going to reset them
- * too making the assumption that this is safe, they are
- * holding no locks. This can only be true if they don't
- * have jobs scheduled which is hopefully the case.
- */
- if (cpu->state != cpu_state_os &&
- cpu->state != cpu_state_active)
- continue;
+ if (cpu->primary == cpu)
+ if (set_special_wakeup(cpu) != OPAL_SUCCESS)
+ return false;
+ }
- /*
- * Only hit cores and only if they aren't on the same core
- * as ourselves
- */
- if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
- cpu->pir & 0x3)
- continue;
+ prlog(PR_DEBUG, "RESET: Stopping the world...\n");
- /* Pick up one of those guys as our "resettor". It will be
- * in charge of resetting this CPU. We avoid resetting
- * ourselves, not sure how well it would do with SCOM
- */
- resettor = cpu;
- break;
- }
+ /* Put everybody in stop except myself */
+ for_each_cpu(cpu) {
+ if (cpu != this_cpu())
+ set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
- if (!resettor) {
- printf("RESET: Can't find a resettor !\n");
- return;
+ /* Also make sure that saved_r1 is 0 ! That's what will
+ * make our reset vector jump to fast_reboot_entry
+ */
+ cpu->save_r1 = 0;
}
- printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n",
- this_cpu()->pir, resettor->pir);
- printf("RESET: Flushing caches...\n");
+ /* Restore skiboot vectors */
+ copy_exception_vectors();
+ setup_reset_vector();
- /* Is that necessary ? */
- flush_caches();
+ prlog(PR_DEBUG, "RESET: Pre-napping all threads but one...\n");
- /* Reset everybody except self and except resettor */
+ /* Put everybody in pre-nap except myself */
for_each_cpu(cpu) {
- if (cpu->state != cpu_state_os &&
- cpu->state != cpu_state_active)
- continue;
- if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
- cpu->pir & 0x3)
- continue;
- if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor))
- continue;
+ if (cpu != this_cpu())
+ set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
+ }
- printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir);
+ prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
- if (!do_reset_core_p7(cpu))
- return;
+ /* Reset everybody except my own core threads */
+ for_each_cpu(cpu) {
+ if (cpu != this_cpu())
+ set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
}
- /* Reset the resettor last because it's going to kill me ! */
- printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir);
- if (!do_reset_core_p7(resettor))
- return;
-
- /* Don't return */
- for (;;)
- ;
+ return true;
}
-void fast_reset(void)
+void fast_reboot(void)
{
- uint32_t pvr = mfspr(SPR_PVR);
- extern uint32_t fast_reset_patch_start;
- extern uint32_t fast_reset_patch_end;
- uint32_t *dst, *src;
+ bool success;
+
+ if (proc_gen != proc_gen_p8) {
+ prlog(PR_DEBUG,
+ "RESET: Fast reboot not available on this CPU\n");
+ return;
+ }
+ if (chip_quirk(QUIRK_NO_DIRECT_CTL)) {
+ prlog(PR_DEBUG,
+ "RESET: Fast reboot disabled by quirk\n");
+ return;
+ }
- printf("RESET: Fast reboot request !\n");
+ prlog(PR_INFO, "RESET: Initiating fast reboot...\n");
/* XXX We need a way to ensure that no other CPU is in skiboot
* holding locks (via the OPAL APIs) and if they are, we need
- * for them to get out
+ * for them to get out. Hopefully that isn't happening, but...
+ *
+ * To fix this properly, we want to keep track of OPAL entry/exit
+ * on all CPUs.
*/
reboot_in_progress = 1;
time_wait_ms(200);
- /* Copy reset trampoline */
- printf("RESET: Copying reset trampoline...\n");
- src = &fast_reset_patch_start;
- dst = (uint32_t *)0x100;
- while(src < &fast_reset_patch_end)
- *(dst++) = *(src++);
- sync_icache();
-
- switch(PVR_TYPE(pvr)) {
- case PVR_TYPE_P7:
- case PVR_TYPE_P7P:
- fast_reset_p7();
+ /* Lock so the new guys coming don't reset us */
+ lock(&reset_lock);
+
+ fast_boot_release = false;
+
+ success = fast_reset_p8();
+
+ /* Unlock, at this point we go away */
+ unlock(&reset_lock);
+
+ if (success) {
+ if (!next_cpu(first_cpu()))
+ /* Only 1 CPU, so fake reset ourselves */
+ asm volatile("ba 0x100 " : : : );
+ /* Don't return */
+ for (;;)
+ ;
}
}
static void cleanup_cpu_state(void)
{
- if (cpu_is_thread0(this_cpu())) {
- cleanup_tlb();
+ struct cpu_thread *cpu = this_cpu();
+
+ cpu->current_hile = false;
+
+ /* Per core cleanup */
+ if (cpu_is_thread0(cpu)) {
+ /* Shared SPRs whacked back to normal */
+
+ /* XXX Update the SLW copies ! Also dbl check HIDs etc... */
init_shared_sprs();
+
+ /* If somebody was in fast_sleep, we may have a workaround
+ * to undo
+ */
+ if (cpu->in_fast_sleep) {
+ prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep"
+ " undoing workarounds...\n", cpu->pir);
+ fast_sleep_exit();
+ }
+
+ /* And we might have lost TB sync */
+ chiptod_wakeup_resync();
+
+ /* The TLB surely contains garbage */
+ cleanup_tlb();
}
+
+ /* Per-thread additional cleanup */
init_replicated_sprs();
- reset_cpu_icp();
+
+ // XXX Cleanup SLW, check HIDs ...
}
-#ifdef FAST_REBOOT_CLEARS_MEMORY
-static void fast_mem_clear(uint64_t start, uint64_t end)
+void __noreturn enter_nap(void);
+
+static void check_split_core(void)
{
- printf("MEMORY: Clearing %llx..%llx\n", start, end);
+ struct cpu_thread *cpu;
+ u64 mask, hid0;
+
+ hid0 = mfspr(SPR_HID0);
+ mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE;
- while(start < end) {
- asm volatile("dcbz 0,%0" : : "r" (start) : "memory");
- start += 128;
+ if ((hid0 & mask) == 0)
+ return;
+
+ prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir);
+
+ /* If it's a secondary thread, just send it to nap */
+ if (this_cpu()->pir & 7) {
+ /* Prepare to be woken up */
+ icp_prep_for_pm();
+ /* Setup LPCR to wakeup on external interrupts only */
+ mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) |
+ SPR_LPCR_P8_PECE2));
+ /* Go to nap (doesn't return) */
+ enter_nap();
}
-}
-static void memory_reset(void)
-{
- struct address_range *i;
- uint64_t skistart = SKIBOOT_BASE;
- uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE;
-
- printf("MEMORY: Clearing ...\n");
-
- list_for_each(&address_ranges, i, list) {
- uint64_t start = cleanup_addr(i->arange->start);
- uint64_t end = cleanup_addr(i->arange->end);
-
- if (start >= skiend || end <= skistart)
- fast_mem_clear(start, end);
- else {
- if (start < skistart)
- fast_mem_clear(start, skistart);
- if (end > skiend)
- fast_mem_clear(skiend, end);
- }
+ prlog(PR_INFO, "RESET: Primary, unsplitting... \n");
+
+ /* Trigger unsplit operation and update SLW image */
+ hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS;
+ set_hid0(hid0);
+ opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0);
+
+ /* Wait for unsplit */
+ while (mfspr(SPR_HID0) & mask)
+ cpu_relax();
+
+ /* Now the guys are sleeping, wake'em up. They will come back
+ * via reset and continue the fast reboot process normally.
+ * No need to wait.
+ */
+ prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n");
+
+ for_each_cpu(cpu) {
+ if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu()))
+ continue;
+ icp_kick_cpu(cpu);
}
}
-#endif /* FAST_REBOOT_CLEARS_MEMORY */
+
/* Entry from asm after a fast reset */
-void __noreturn fast_reboot(void);
+void __noreturn fast_reboot_entry(void);
-void __noreturn fast_reboot(void)
+void __noreturn fast_reboot_entry(void)
{
- static volatile bool fast_boot_release;
struct cpu_thread *cpu;
- printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir);
+ prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", this_cpu()->pir);
+ time_wait_ms(100);
- /* If this CPU was chosen as the resettor, it must reset the
- * resettee (the one that initiated the whole process
+ lock(&reset_lock);
+ if (last_man_standing && next_cpu(first_cpu())) {
+ prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
+ set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
+ set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
+ }
+ last_man_standing = NULL;
+ unlock(&reset_lock);
+
+ /* We reset our ICP first ! Otherwise we might get stray interrupts
+ * when unsplitting
+ */
+ reset_cpu_icp();
+
+ /* If we are split, we need to unsplit. Since that can send us
+ * to NAP, which will come back via reset, we do it now
*/
- if (this_cpu() == resettor)
- do_reset_core_p7(resettee);
+ check_split_core();
/* Are we the original boot CPU ? If not, we spin waiting
* for a relase signal from CPU 1, then we clean ourselves
@@ -277,8 +455,10 @@ void __noreturn fast_reboot(void)
__secondary_cpu_entry();
}
+ prlog(PR_INFO, "RESET: Boot CPU waiting for everybody...\n");
+
/* We are the original boot CPU, wait for secondaries to
- * be captured
+ * be captured.
*/
for_each_cpu(cpu) {
if (cpu == this_cpu())
@@ -292,7 +472,7 @@ void __noreturn fast_reboot(void)
smt_medium();
}
- printf("INIT: Releasing secondaries...\n");
+ prlog(PR_INFO, "RESET: Releasing secondaries...\n");
/* Release everybody */
fast_boot_release = true;
@@ -310,7 +490,14 @@ void __noreturn fast_reboot(void)
}
}
- printf("INIT: All done, resetting everything else...\n");
+ prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n");
+
+ for_each_cpu(cpu) {
+ if (cpu->primary == cpu)
+ clr_special_wakeup(cpu);
+ }
+
+ prlog(PR_INFO, "RESET: All done, cleaning up...\n");
/* Clear release flag for next time */
fast_boot_release = false;
@@ -322,6 +509,12 @@ void __noreturn fast_reboot(void)
/* Set our state to active */
this_cpu()->state = cpu_state_active;
+ /* We can now do NAP mode */
+ cpu_set_pm_enable(true);
+
+ /* Start preloading kernel and ramdisk */
+ start_preload_kernel();
+
/* Poke the consoles (see comments in the code there) */
fsp_console_reset();
@@ -331,15 +524,6 @@ void __noreturn fast_reboot(void)
/* Remove all PCI devices */
pci_reset();
- /* Reset IO Hubs */
- cec_reset();
-
- /* Re-Initialize all discovered PCI slots */
- pci_init_slots();
-
- /* Clear memory */
-#ifdef FAST_REBOOT_CLEARS_MEMORY
- memory_reset();
-#endif
+ /* Load and boot payload */
load_and_boot_kernel(true);
}
diff --git a/core/init.c b/core/init.c
index 12fd7a7..bc14da7 100644
--- a/core/init.c
+++ b/core/init.c
@@ -303,7 +303,7 @@ extern uint64_t boot_offset;
static size_t initramfs_size;
-static bool start_preload_kernel(void)
+bool start_preload_kernel(void)
{
int loaded;
@@ -434,6 +434,9 @@ static void load_initramfs(void)
{
int loaded;
+ dt_check_del_prop(dt_chosen, "linux,initrd-start");
+ dt_check_del_prop(dt_chosen, "linux,initrd-end");
+
loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
RESOURCE_SUBID_NONE);
@@ -499,6 +502,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
occ_pstates_init();
/* Set kernel command line argument if specified */
+ dt_check_del_prop(dt_chosen, "bootargs");
cmdline = nvram_query("bootargs");
#ifdef KERNEL_COMMAND_LINE
if (!cmdline)
@@ -646,7 +650,7 @@ void setup_reset_vector(void)
*(dst++) = *(src++);
}
-static void copy_exception_vectors(void)
+void copy_exception_vectors(void)
{
/* Backup previous vectors as this could contain a kernel
* image.
diff --git a/core/lock.c b/core/lock.c
index 53cc337..e82048b 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -110,6 +110,9 @@ void unlock(struct lock *l)
this_cpu()->lock_depth--;
l->lock_val = 0;
+ /* WARNING: On fast reboot, we can be reset right at that
+ * point, so the reset_lock in there cannot be in the con path
+ */
if (l->in_con_path) {
cpu->con_suspend--;
if (cpu->con_suspend == 0 && cpu->con_need_flush)
diff --git a/core/pci.c b/core/pci.c
index 30307c9..6a1dabc 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -1456,6 +1456,7 @@ static void __pci_reset(struct list_head *list)
while ((pd = list_pop(list, struct pci_device, link)) != NULL) {
__pci_reset(&pd->children);
+ dt_free(pd->dn);
free(pd);
}
}
@@ -1472,10 +1473,17 @@ void pci_reset(void)
* state machine could be done in parallel)
*/
for (i = 0; i < ARRAY_SIZE(phbs); i++) {
- if (!phbs[i])
+ struct phb *phb = phbs[i];
+ if (!phb)
continue;
- __pci_reset(&phbs[i]->devices);
+ __pci_reset(&phb->devices);
+ if (phb->ops->ioda_reset)
+ phb->ops->ioda_reset(phb, true);
}
+
+ /* Re-Initialize all discovered PCI slots */
+ pci_init_slots();
+
}
static void pci_do_jobs(void (*fn)(void *))
diff --git a/core/platform.c b/core/platform.c
index 7672914..b37346e 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -24,6 +24,7 @@
#include <xscom.h>
#include <errorlog.h>
#include <bt.h>
+#include <nvram.h>
bool manufacturing_mode = false;
struct platform platform;
@@ -54,10 +55,10 @@ static int64_t opal_cec_reboot(void)
console_complete_flush();
-#ifdef ENABLE_FAST_RESET
- /* Try a fast reset first */
- fast_reset();
-#endif
+ /* Try a fast reset first, if enabled */
+ if (nvram_query_eq("experimental-fast-reset","feeling-lucky"))
+ fast_reboot();
+
if (platform.cec_reboot)
return platform.cec_reboot();
diff --git a/hw/fsp/fsp-console.c b/hw/fsp/fsp-console.c
index 44d24cc..0080d73 100644
--- a/hw/fsp/fsp-console.c
+++ b/hw/fsp/fsp-console.c
@@ -892,6 +892,9 @@ static void reopen_all_hvsi(void)
void fsp_console_reset(void)
{
+ if (!fsp_present())
+ return;
+
prlog(PR_NOTICE, "FSP: Console reset !\n");
/* This is called on a fast-reset. To work around issues with HVSI
@@ -1001,6 +1004,8 @@ void fsp_console_select_stdout(void)
*/
}
}
+ dt_check_del_prop(dt_chosen, "linux,stdout-path");
+
if (fsp_serials[1].open && use_serial) {
dt_add_property_string(dt_chosen, "linux,stdout-path",
"/ibm,opal/consoles/serial@1");
diff --git a/hw/fsp/fsp-leds.c b/hw/fsp/fsp-leds.c
index 50e82b5..b5a32ad 100644
--- a/hw/fsp/fsp-leds.c
+++ b/hw/fsp/fsp-leds.c
@@ -1570,6 +1570,9 @@ void create_led_device_nodes(void)
if (!pled)
return;
+ /* Check if already populated (fast-reboot) */
+ if (dt_has_node_property(pled, "compatible", NULL))
+ return;
dt_add_property_strings(pled, "compatible", DT_PROPERTY_LED_COMPATIBLE);
led_mode = dt_prop_get(pled, DT_PROPERTY_LED_MODE);
diff --git a/hw/occ.c b/hw/occ.c
index 63e142c..d5c590b 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -517,10 +517,14 @@ void occ_pstates_init(void)
struct proc_chip *chip;
struct cpu_thread *c;
s8 pstate_nom;
+ static bool occ_pstates_initialized;
/* OCC is P8 only */
if (proc_gen != proc_gen_p8)
return;
+ /* Handle fast reboots */
+ if (occ_pstates_initialized)
+ return;
chip = next_chip(NULL);
if (!chip->homer_base) {
@@ -558,6 +562,7 @@ void occ_pstates_init(void)
for_each_chip(chip)
chip->throttle = 0;
opal_add_poller(occ_throttle_poll, NULL);
+ occ_pstates_initialized = true;
}
struct occ_load_req {
diff --git a/hw/psi.c b/hw/psi.c
index 900886a..17e83a5 100644
--- a/hw/psi.c
+++ b/hw/psi.c
@@ -378,6 +378,36 @@ static uint64_t psi_p7_irq_attributes(struct irq_source *is __unused,
return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_FREQUENT;
}
+static const uint32_t psi_p8_irq_to_xivr[P8_IRQ_PSI_IRQ_COUNT] = {
+ [P8_IRQ_PSI_FSP] = PSIHB_XIVR_FSP,
+ [P8_IRQ_PSI_OCC] = PSIHB_XIVR_OCC,
+ [P8_IRQ_PSI_FSI] = PSIHB_XIVR_FSI,
+ [P8_IRQ_PSI_LPC] = PSIHB_XIVR_LPC,
+ [P8_IRQ_PSI_LOCAL_ERR] = PSIHB_XIVR_LOCAL_ERR,
+ [P8_IRQ_PSI_EXTERNAL]= PSIHB_XIVR_HOST_ERR,
+};
+
+static void psi_cleanup_irq(struct psi *psi)
+{
+ uint32_t irq;
+ uint64_t xivr, xivr_p;
+
+ for (irq = 0; irq < P8_IRQ_PSI_IRQ_COUNT; irq++) {
+ prlog(PR_DEBUG, "PSI[0x%03x]: Cleaning up IRQ %d\n",
+ psi->chip_id, irq);
+
+ xivr_p = psi_p8_irq_to_xivr[irq];
+ xivr = in_be64(psi->regs + xivr_p);
+ xivr |= (0xffull << 32);
+ out_be64(psi->regs + xivr_p, xivr);
+ time_wait_ms_nopoll(10);
+ xivr = in_be64(psi->regs + xivr_p);
+ if (xivr & PPC_BIT(39)) {
+ printf(" Need EOI !\n");
+ icp_send_eoi(psi->interrupt + irq);
+ }
+ }
+}
/* Called on a fast reset, make sure we aren't stuck with
* an accepted and never EOId PSI interrupt
@@ -385,27 +415,13 @@ static uint64_t psi_p7_irq_attributes(struct irq_source *is __unused,
void psi_irq_reset(void)
{
struct psi *psi;
- uint64_t xivr;
printf("PSI: Hot reset!\n");
- assert(proc_gen == proc_gen_p7);
+ assert(proc_gen == proc_gen_p8);
list_for_each(&psis, psi, list) {
- /* Mask the interrupt & clean the XIVR */
- xivr = 0x000000ff00000000UL;
- xivr |= P7_IRQ_BUID(psi->interrupt) << 16;
- out_be64(psi->regs + PSIHB_XIVR, xivr);
-
-#if 0 /* Seems to checkstop ... */
- /*
- * Maybe not anymore; we were just blindly sending
- * this on all iopaths, not just the active one;
- * We don't even know if those psis are even correct.
- */
- /* Send a dummy EOI to make sure the ICP is clear */
- icp_send_eoi(psi->interrupt);
-#endif
+ psi_cleanup_irq(psi);
}
}
@@ -416,34 +432,17 @@ static const struct irq_source_ops psi_p7_irq_ops = {
.attributes = psi_p7_irq_attributes,
};
+
static int64_t psi_p8_set_xive(struct irq_source *is, uint32_t isn,
uint16_t server, uint8_t priority)
{
struct psi *psi = is->data;
uint64_t xivr_p, xivr;
+ uint32_t irq_idx = isn & 7;
- switch(isn & 7) {
- case P8_IRQ_PSI_FSP:
- xivr_p = PSIHB_XIVR_FSP;
- break;
- case P8_IRQ_PSI_OCC:
- xivr_p = PSIHB_XIVR_OCC;
- break;
- case P8_IRQ_PSI_FSI:
- xivr_p = PSIHB_XIVR_FSI;
- break;
- case P8_IRQ_PSI_LPC:
- xivr_p = PSIHB_XIVR_LPC;
- break;
- case P8_IRQ_PSI_LOCAL_ERR:
- xivr_p = PSIHB_XIVR_LOCAL_ERR;
- break;
- case P8_IRQ_PSI_EXTERNAL:
- xivr_p = PSIHB_XIVR_HOST_ERR;
- break;
- default:
- return OPAL_PARAMETER;
- }
+ if (irq_idx >= P8_IRQ_PSI_IRQ_COUNT)
+ return OPAL_PARAMETER;
+ xivr_p = psi_p8_irq_to_xivr[irq_idx];
/* Populate the XIVR */
xivr = (uint64_t)server << 40;
@@ -460,29 +459,12 @@ static int64_t psi_p8_get_xive(struct irq_source *is, uint32_t isn __unused,
{
struct psi *psi = is->data;
uint64_t xivr_p, xivr;
+ uint32_t irq_idx = isn & 7;
- switch(isn & 7) {
- case P8_IRQ_PSI_FSP:
- xivr_p = PSIHB_XIVR_FSP;
- break;
- case P8_IRQ_PSI_OCC:
- xivr_p = PSIHB_XIVR_OCC;
- break;
- case P8_IRQ_PSI_FSI:
- xivr_p = PSIHB_XIVR_FSI;
- break;
- case P8_IRQ_PSI_LPC:
- xivr_p = PSIHB_XIVR_LPC;
- break;
- case P8_IRQ_PSI_LOCAL_ERR:
- xivr_p = PSIHB_XIVR_LOCAL_ERR;
- break;
- case P8_IRQ_PSI_EXTERNAL:
- xivr_p = PSIHB_XIVR_HOST_ERR;
- break;
- default:
- return OPAL_PARAMETER;
- }
+ if (irq_idx >= P8_IRQ_PSI_IRQ_COUNT)
+ return OPAL_PARAMETER;
+
+ xivr_p = psi_p8_irq_to_xivr[irq_idx];
/* Read & decode the XIVR */
xivr = in_be64(psi->regs + xivr_p);
@@ -1053,3 +1035,4 @@ void psi_init(void)
psi_init_psihb(np);
}
+
diff --git a/hw/slw.c b/hw/slw.c
index b916069..b4fb6ec 100644
--- a/hw/slw.c
+++ b/hw/slw.c
@@ -1060,6 +1060,8 @@ static void fast_sleep_enter(void)
}
primary_thread->save_l2_fir_action1 = tmp;
+ primary_thread->in_fast_sleep = true;
+
tmp = tmp & ~0x0200000000000000ULL;
rc = xscom_write(chip_id, XSCOM_ADDR_P8_EX(core, L2_FIR_ACTION1),
tmp);
@@ -1082,7 +1084,7 @@ static void fast_sleep_enter(void)
/* Workarounds while exiting fast-sleep */
-static void fast_sleep_exit(void)
+void fast_sleep_exit(void)
{
uint32_t core = pir_to_core_id(this_cpu()->pir);
uint32_t chip_id = this_cpu()->chip_id;
@@ -1090,6 +1092,7 @@ static void fast_sleep_exit(void)
int rc;
primary_thread = this_cpu()->primary;
+ primary_thread->in_fast_sleep = false;
rc = xscom_write(chip_id, XSCOM_ADDR_P8_EX(core, L2_FIR_ACTION1),
primary_thread->save_l2_fir_action1);
@@ -1131,7 +1134,7 @@ static int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t enter)
opal_call(OPAL_CONFIG_CPU_IDLE_STATE, opal_config_cpu_idle_state, 2);
#ifdef __HAVE_LIBPORE__
-static int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val)
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val)
{
struct cpu_thread *c = find_cpu_by_pir(cpu_pir);
diff --git a/include/config.h b/include/config.h
index 2524570..f6572b0 100644
--- a/include/config.h
+++ b/include/config.h
@@ -72,9 +72,6 @@
*/
//#define FORCE_DUMMY_CONSOLE 1
-/* Enable this to do fast resets. Currently unreliable... */
-//#define ENABLE_FAST_RESET 1
-
/* Enable this to make fast reboot clear memory */
//#define FAST_REBOOT_CLEARS_MEMORY 1
diff --git a/include/cpu.h b/include/cpu.h
index 341e73d..f649a13 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -66,6 +66,7 @@ struct cpu_thread {
bool in_mcount;
bool in_poller;
bool in_reinit;
+ bool in_fast_sleep;
bool in_sleep;
bool in_idle;
uint32_t hbrt_spec_wakeup; /* primary only */
diff --git a/include/processor.h b/include/processor.h
index 4b11702..3942268 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -164,6 +164,7 @@
/* Bits in HID0 */
#define SPR_HID0_POWER8_4LPARMODE PPC_BIT(2)
#define SPR_HID0_POWER8_2LPARMODE PPC_BIT(6)
+#define SPR_HID0_POWER8_DYNLPARDIS PPC_BIT(15)
#define SPR_HID0_POWER8_HILE PPC_BIT(19)
#define SPR_HID0_POWER9_HILE PPC_BIT(4)
#define SPR_HID0_POWER8_ENABLE_ATTN PPC_BIT(31)
diff --git a/include/skiboot.h b/include/skiboot.h
index 2a9f5e2..2ef7677 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -190,12 +190,14 @@ extern unsigned long get_symbol(unsigned long addr,
char **sym, char **sym_end);
/* Fast reboot support */
-extern void fast_reset(void);
+extern void fast_reboot(void);
extern void __noreturn __secondary_cpu_entry(void);
extern void __noreturn load_and_boot_kernel(bool is_reboot);
extern void cleanup_tlb(void);
extern void init_shared_sprs(void);
extern void init_replicated_sprs(void);
+extern bool start_preload_kernel(void);
+extern void copy_exception_vectors(void);
extern void setup_reset_vector(void);
/* Various probe routines, to replace with an initcall system */
@@ -269,6 +271,11 @@ extern void slw_update_timer_expiry(uint64_t new_target);
/* Is SLW timer available ? */
extern bool slw_timer_ok(void);
+/* Patch SPR in SLW image */
+extern int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
+
+extern void fast_sleep_exit(void);
+
/* Fallback fake RTC */
extern void fake_rtc_init(void);
diff --git a/platforms/mambo/mambo.c b/platforms/mambo/mambo.c
index 64248ef..bd151ed 100644
--- a/platforms/mambo/mambo.c
+++ b/platforms/mambo/mambo.c
@@ -92,6 +92,8 @@ static inline int callthru3(int command, unsigned long arg1, unsigned long arg2,
#define BOGUS_DISK_WRITE 117
#define BOGUS_DISK_INFO 118
+#define CALL_TCL 86
+
static inline int callthru_disk_read(int id, void *buf, unsigned long sect,
unsigned long nrsect)
{
@@ -112,6 +114,14 @@ static inline unsigned long callthru_disk_info(int op, int id)
(unsigned long)id);
}
+extern unsigned long callthru_tcl(const char *str, int len);
+
+unsigned long callthru_tcl(const char *str, int len)
+{
+ prlog(PR_DEBUG, "Sending TCL to Mambo, cmd: %s\n", str);
+ return callthru2(CALL_TCL, (unsigned long)str, (unsigned long)len);
+}
+
struct bogus_disk_info {
unsigned long size;
int id;