diff options
Diffstat (limited to 'core/fast-reboot.c')
-rw-r--r-- | core/fast-reboot.c | 538 |
1 files changed, 361 insertions, 177 deletions
diff --git a/core/fast-reboot.c b/core/fast-reboot.c index 30b77e9..cf8b3d4 100644 --- a/core/fast-reboot.c +++ b/core/fast-reboot.c @@ -25,242 +25,420 @@ #include <timebase.h> #include <pci.h> #include <chip.h> +#include <chiptod.h> + +#define P8_EX_TCTL_DIRECT_CONTROLS(t) (0x10013000 + (t) * 0x10) +#define P8_DIRECT_CTL_STOP PPC_BIT(63) +#define P8_DIRECT_CTL_PRENAP PPC_BIT(47) +#define P8_DIRECT_CTL_SRESET PPC_BIT(60) -/* - * To get control of all threads, we sreset them via XSCOM after - * patching the 0x100 vector. This will work as long as the target - * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider - * a more messy approach. - * - * The SCOM register we want is called "Core RAS Control" in the doc - * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list - * - * Bits in there change from CPU rev to CPU rev but the bit we care - * about, bit 60 "sreset_request" appears to have stuck to the same - * place in both P7 and P7+. The register also has the same SCOM - * address - */ -#define EX0_TCTL_DIRECT_CONTROLS0 0x08010400 -#define EX0_TCTL_DIRECT_CONTROLS1 0x08010440 -#define EX0_TCTL_DIRECT_CONTROLS2 0x08010480 -#define EX0_TCTL_DIRECT_CONTROLS3 0x080104c0 -#define TCTL_DC_SRESET_REQUEST PPC_BIT(60) /* Flag tested by the OPAL entry code */ uint8_t reboot_in_progress; -static struct cpu_thread *resettor, *resettee; +static volatile bool fast_boot_release; +static struct cpu_thread *last_man_standing; +static struct lock reset_lock = LOCK_UNLOCKED; -static void flush_caches(void) +static int set_special_wakeup(struct cpu_thread *cpu) { - uint64_t base = SKIBOOT_BASE; - uint64_t end = base + SKIBOOT_SIZE; + uint64_t val, poll_target, stamp; + uint32_t core_id; + int rc; + + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ - /* Not sure what the effect of sreset is on cores, so let's - * shoot a series of dcbf's on all cachelines that make up - * our core memory just in case... + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); + + prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id); + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return OPAL_SUCCESS; + + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not necessary */ - while(base < end) { - asm volatile("dcbf 0,%0" : : "r" (base) : "memory"); - base += 128; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), + PPC_BIT(0)); + if (rc) { + prerror("RESET: XSCOM error %d asserting special" + " wakeup on 0x%x\n", rc, cpu->pir); + return rc; } - sync(); + + /* + * HWP uses the history for Perf register here, dunno why it uses + * that one instead of the pHyp one, maybe to avoid clobbering it... + * + * In any case, it does that to check for run/nap vs.sleep/winkle/other + * to decide whether to poll on checkstop or not. Since we don't deal + * with checkstop conditions here, we ignore that part. + */ + + /* + * Now poll for completion of special wakeup. The HWP is nasty here, + * it will poll at 5ms intervals for up to 200ms. This is not quite + * acceptable for us at runtime, at least not until we have the + * ability to "context switch" HBRT. In practice, because we don't + * winkle, it will never take that long, so we increase the polling + * frequency to 1us per poll. However we do have to keep the same + * timeout. + * + * We don't use time_wait_ms() either for now as we don't want to + * poll the FSP here. + */ + stamp = mftb(); + poll_target = stamp + msecs_to_tb(200); + val = 0; + while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) { + /* Wait 1 us */ + time_wait_us(1); + + /* Read PM state */ + rc = xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0), + &val); + if (rc) { + prerror("RESET: XSCOM error %d reading PM state on" + " 0x%x\n", rc, cpu->pir); + return rc; + } + /* Check timeout */ + if (mftb() > poll_target) + break; + } + + /* Success ? */ + if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) { + uint64_t now = mftb(); + prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n", + tb_to_usecs(now - stamp)); + return 0; + } + + /* + * We timed out ... + * + * HWP has a complex workaround for HW255321 which affects + * Murano DD1 and Venice DD1. Ignore that for now + * + * Instead we just dump some XSCOMs for error logging + */ + prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir); + prerror("RESET: PM0 = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + prerror("RESET: SPC_WKUP = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_IDLE_STATE_HISTORY_PHYP), + &val); + prerror("RESET: HISTORY = 0x%016llx\n", val); + + return OPAL_HARDWARE; } -static bool do_reset_core_p7(struct cpu_thread *cpu) +static int clr_special_wakeup(struct cpu_thread *cpu) { - uint32_t xscom_addr, chip; - uint64_t ctl; + uint64_t val; + uint32_t core_id; int rc; - /* Add the Core# */ - xscom_addr = EX0_TCTL_DIRECT_CONTROLS0; - xscom_addr |= ((cpu->pir >> 2) & 7) << 24; + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ + + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); - chip = pir_to_chip_id(cpu->pir); + prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id); + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return OPAL_SUCCESS; - ctl = TCTL_DC_SRESET_REQUEST; - rc = xscom_write(chip, xscom_addr, ctl); - rc |= xscom_write(chip, xscom_addr + 0x40, ctl); - rc |= xscom_write(chip, xscom_addr + 0x80, ctl); - rc |= xscom_write(chip, xscom_addr + 0xc0, ctl); + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not necessary + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), 0); if (rc) { - prerror("RESET: Error %d resetting CPU 0x%04x\n", - rc, cpu->pir); - return false; + prerror("RESET: XSCOM error %d deasserting" + " special wakeup on 0x%x\n", rc, cpu->pir); + return rc; } - return true; + + /* + * The original HWp reads the XSCOM again with the comment + * "This puts an inherent delay in the propagation of the reset + * transition" + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + return 0; } -static void fast_reset_p7(void) +extern unsigned long callthru_tcl(const char *str, int len); + +static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits) +{ + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t xscom_addr; + char tcl_cmd[50]; + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + if (bits != P8_DIRECT_CTL_SRESET) + return; + snprintf(tcl_cmd, sizeof(tcl_cmd), "mysim cpu %i:%i set spr pc 0x100", core_id, thread_id); + callthru_tcl(tcl_cmd, strlen(tcl_cmd)); + return; + } + + xscom_addr = XSCOM_ADDR_P8_EX(core_id, + P8_EX_TCTL_DIRECT_CONTROLS(thread_id)); + + xscom_write(chip_id, xscom_addr, bits); +} + +static bool fast_reset_p8(void) { struct cpu_thread *cpu; - resettee = this_cpu(); - resettor = NULL; + /* Mark ourselves as last man standing in need of a reset */ + last_man_standing = this_cpu(); - /* Pick up a candidate resettor. We do that before we flush - * the caches - */ + prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n", + this_cpu()->pir, pir_to_core_id(this_cpu()->pir)); + + /* Assert special wakup on all cores */ for_each_cpu(cpu) { - /* - * Some threads might still be in skiboot. - * - * But because we deal with entire cores and we don't want - * to special case things, we are just going to reset them - * too making the assumption that this is safe, they are - * holding no locks. This can only be true if they don't - * have jobs scheduled which is hopefully the case. - */ - if (cpu->state != cpu_state_os && - cpu->state != cpu_state_active) - continue; + if (cpu->primary == cpu) + if (set_special_wakeup(cpu) != OPAL_SUCCESS) + return false; + } - /* - * Only hit cores and only if they aren't on the same core - * as ourselves - */ - if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) || - cpu->pir & 0x3) - continue; + prlog(PR_DEBUG, "RESET: Stopping the world...\n"); - /* Pick up one of those guys as our "resettor". It will be - * in charge of resetting this CPU. We avoid resetting - * ourselves, not sure how well it would do with SCOM - */ - resettor = cpu; - break; - } + /* Put everybody in stop except myself */ + for_each_cpu(cpu) { + if (cpu != this_cpu()) + set_direct_ctl(cpu, P8_DIRECT_CTL_STOP); - if (!resettor) { - printf("RESET: Can't find a resettor !\n"); - return; + /* Also make sure that saved_r1 is 0 ! That's what will + * make our reset vector jump to fast_reboot_entry + */ + cpu->save_r1 = 0; } - printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n", - this_cpu()->pir, resettor->pir); - printf("RESET: Flushing caches...\n"); + /* Restore skiboot vectors */ + copy_exception_vectors(); + setup_reset_vector(); - /* Is that necessary ? */ - flush_caches(); + prlog(PR_DEBUG, "RESET: Pre-napping all threads but one...\n"); - /* Reset everybody except self and except resettor */ + /* Put everybody in pre-nap except myself */ for_each_cpu(cpu) { - if (cpu->state != cpu_state_os && - cpu->state != cpu_state_active) - continue; - if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) || - cpu->pir & 0x3) - continue; - if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor)) - continue; + if (cpu != this_cpu()) + set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP); + } - printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir); + prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n"); - if (!do_reset_core_p7(cpu)) - return; + /* Reset everybody except my own core threads */ + for_each_cpu(cpu) { + if (cpu != this_cpu()) + set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET); } - /* Reset the resettor last because it's going to kill me ! */ - printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir); - if (!do_reset_core_p7(resettor)) - return; - - /* Don't return */ - for (;;) - ; + return true; } -void fast_reset(void) +void fast_reboot(void) { - uint32_t pvr = mfspr(SPR_PVR); - extern uint32_t fast_reset_patch_start; - extern uint32_t fast_reset_patch_end; - uint32_t *dst, *src; + bool success; + + if (proc_gen != proc_gen_p8) { + prlog(PR_DEBUG, + "RESET: Fast reboot not available on this CPU\n"); + return; + } + if (chip_quirk(QUIRK_NO_DIRECT_CTL)) { + prlog(PR_DEBUG, + "RESET: Fast reboot disabled by quirk\n"); + return; + } - printf("RESET: Fast reboot request !\n"); + prlog(PR_INFO, "RESET: Initiating fast reboot...\n"); /* XXX We need a way to ensure that no other CPU is in skiboot * holding locks (via the OPAL APIs) and if they are, we need - * for them to get out + * for them to get out. Hopefully that isn't happening, but... + * + * To fix this properly, we want to keep track of OPAL entry/exit + * on all CPUs. */ reboot_in_progress = 1; time_wait_ms(200); - /* Copy reset trampoline */ - printf("RESET: Copying reset trampoline...\n"); - src = &fast_reset_patch_start; - dst = (uint32_t *)0x100; - while(src < &fast_reset_patch_end) - *(dst++) = *(src++); - sync_icache(); - - switch(PVR_TYPE(pvr)) { - case PVR_TYPE_P7: - case PVR_TYPE_P7P: - fast_reset_p7(); + /* Lock so the new guys coming don't reset us */ + lock(&reset_lock); + + fast_boot_release = false; + + success = fast_reset_p8(); + + /* Unlock, at this point we go away */ + unlock(&reset_lock); + + if (success) { + if (!next_cpu(first_cpu())) + /* Only 1 CPU, so fake reset ourselves */ + asm volatile("ba 0x100 " : : : ); + /* Don't return */ + for (;;) + ; } } static void cleanup_cpu_state(void) { - if (cpu_is_thread0(this_cpu())) { - cleanup_tlb(); + struct cpu_thread *cpu = this_cpu(); + + cpu->current_hile = false; + + /* Per core cleanup */ + if (cpu_is_thread0(cpu)) { + /* Shared SPRs whacked back to normal */ + + /* XXX Update the SLW copies ! Also dbl check HIDs etc... */ init_shared_sprs(); + + /* If somebody was in fast_sleep, we may have a workaround + * to undo + */ + if (cpu->in_fast_sleep) { + prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep" + " undoing workarounds...\n", cpu->pir); + fast_sleep_exit(); + } + + /* And we might have lost TB sync */ + chiptod_wakeup_resync(); + + /* The TLB surely contains garbage */ + cleanup_tlb(); } + + /* Per-thread additional cleanup */ init_replicated_sprs(); - reset_cpu_icp(); + + // XXX Cleanup SLW, check HIDs ... } -#ifdef FAST_REBOOT_CLEARS_MEMORY -static void fast_mem_clear(uint64_t start, uint64_t end) +void __noreturn enter_nap(void); + +static void check_split_core(void) { - printf("MEMORY: Clearing %llx..%llx\n", start, end); + struct cpu_thread *cpu; + u64 mask, hid0; + + hid0 = mfspr(SPR_HID0); + mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE; - while(start < end) { - asm volatile("dcbz 0,%0" : : "r" (start) : "memory"); - start += 128; + if ((hid0 & mask) == 0) + return; + + prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir); + + /* If it's a secondary thread, just send it to nap */ + if (this_cpu()->pir & 7) { + /* Prepare to be woken up */ + icp_prep_for_pm(); + /* Setup LPCR to wakeup on external interrupts only */ + mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) | + SPR_LPCR_P8_PECE2)); + /* Go to nap (doesn't return) */ + enter_nap(); } -} -static void memory_reset(void) -{ - struct address_range *i; - uint64_t skistart = SKIBOOT_BASE; - uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE; - - printf("MEMORY: Clearing ...\n"); - - list_for_each(&address_ranges, i, list) { - uint64_t start = cleanup_addr(i->arange->start); - uint64_t end = cleanup_addr(i->arange->end); - - if (start >= skiend || end <= skistart) - fast_mem_clear(start, end); - else { - if (start < skistart) - fast_mem_clear(start, skistart); - if (end > skiend) - fast_mem_clear(skiend, end); - } + prlog(PR_INFO, "RESET: Primary, unsplitting... \n"); + + /* Trigger unsplit operation and update SLW image */ + hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS; + set_hid0(hid0); + opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0); + + /* Wait for unsplit */ + while (mfspr(SPR_HID0) & mask) + cpu_relax(); + + /* Now the guys are sleeping, wake'em up. They will come back + * via reset and continue the fast reboot process normally. + * No need to wait. + */ + prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n"); + + for_each_cpu(cpu) { + if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu())) + continue; + icp_kick_cpu(cpu); } } -#endif /* FAST_REBOOT_CLEARS_MEMORY */ + /* Entry from asm after a fast reset */ -void __noreturn fast_reboot(void); +void __noreturn fast_reboot_entry(void); -void __noreturn fast_reboot(void) +void __noreturn fast_reboot_entry(void) { - static volatile bool fast_boot_release; struct cpu_thread *cpu; - printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir); + prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", this_cpu()->pir); + time_wait_ms(100); - /* If this CPU was chosen as the resettor, it must reset the - * resettee (the one that initiated the whole process + lock(&reset_lock); + if (last_man_standing && next_cpu(first_cpu())) { + prlog(PR_DEBUG, "RESET: last man standing fixup...\n"); + set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP); + set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET); + } + last_man_standing = NULL; + unlock(&reset_lock); + + /* We reset our ICP first ! Otherwise we might get stray interrupts + * when unsplitting + */ + reset_cpu_icp(); + + /* If we are split, we need to unsplit. Since that can send us + * to NAP, which will come back via reset, we do it now */ - if (this_cpu() == resettor) - do_reset_core_p7(resettee); + check_split_core(); /* Are we the original boot CPU ? If not, we spin waiting * for a relase signal from CPU 1, then we clean ourselves @@ -277,8 +455,10 @@ void __noreturn fast_reboot(void) __secondary_cpu_entry(); } + prlog(PR_INFO, "RESET: Boot CPU waiting for everybody...\n"); + /* We are the original boot CPU, wait for secondaries to - * be captured + * be captured. */ for_each_cpu(cpu) { if (cpu == this_cpu()) @@ -292,7 +472,7 @@ void __noreturn fast_reboot(void) smt_medium(); } - printf("INIT: Releasing secondaries...\n"); + prlog(PR_INFO, "RESET: Releasing secondaries...\n"); /* Release everybody */ fast_boot_release = true; @@ -310,7 +490,14 @@ void __noreturn fast_reboot(void) } } - printf("INIT: All done, resetting everything else...\n"); + prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n"); + + for_each_cpu(cpu) { + if (cpu->primary == cpu) + clr_special_wakeup(cpu); + } + + prlog(PR_INFO, "RESET: All done, cleaning up...\n"); /* Clear release flag for next time */ fast_boot_release = false; @@ -322,6 +509,12 @@ void __noreturn fast_reboot(void) /* Set our state to active */ this_cpu()->state = cpu_state_active; + /* We can now do NAP mode */ + cpu_set_pm_enable(true); + + /* Start preloading kernel and ramdisk */ + start_preload_kernel(); + /* Poke the consoles (see comments in the code there) */ fsp_console_reset(); @@ -331,15 +524,6 @@ void __noreturn fast_reboot(void) /* Remove all PCI devices */ pci_reset(); - /* Reset IO Hubs */ - cec_reset(); - - /* Re-Initialize all discovered PCI slots */ - pci_init_slots(); - - /* Clear memory */ -#ifdef FAST_REBOOT_CLEARS_MEMORY - memory_reset(); -#endif + /* Load and boot payload */ load_and_boot_kernel(true); } |