diff options
-rw-r--r-- | asm/head.S | 122 | ||||
-rw-r--r-- | core/fast-reboot.c | 538 | ||||
-rw-r--r-- | core/init.c | 8 | ||||
-rw-r--r-- | core/lock.c | 3 | ||||
-rw-r--r-- | core/pci.c | 12 | ||||
-rw-r--r-- | core/platform.c | 9 | ||||
-rw-r--r-- | hw/fsp/fsp-console.c | 5 | ||||
-rw-r--r-- | hw/fsp/fsp-leds.c | 3 | ||||
-rw-r--r-- | hw/occ.c | 5 | ||||
-rw-r--r-- | hw/psi.c | 103 | ||||
-rw-r--r-- | hw/slw.c | 7 | ||||
-rw-r--r-- | include/config.h | 3 | ||||
-rw-r--r-- | include/cpu.h | 1 | ||||
-rw-r--r-- | include/processor.h | 1 | ||||
-rw-r--r-- | include/skiboot.h | 9 | ||||
-rw-r--r-- | platforms/mambo/mambo.c | 10 |
16 files changed, 527 insertions, 312 deletions
@@ -458,64 +458,6 @@ call_relocate: 1: /* Fatal relocate failure */ attn -/* This is a little piece of code that is copied down to - * 0x100 when doing a "fast reset" - */ -.global fast_reset_patch_start -fast_reset_patch_start: - smt_medium - LOAD_IMM64(%r30, SKIBOOT_BASE) - LOAD_IMM32(%r3, fast_reset_entry - __head) - add %r3,%r30,%r3 - mtctr %r3 - bctr -.global fast_reset_patch_end -fast_reset_patch_end: - -/* Fast reset code. We clean up the TLB and a few SPRs and - * return to C code. All CPUs do that, the CPU triggering the - * reset does it to itself last. The C code will sort out who - * the master is. We come from the trampoline above with - * r30 containing SKIBOOT_BASE - */ -fast_reset_entry: - /* Clear out SLB */ - li %r6,0 - slbmte %r6,%r6 - slbia - ptesync - - /* Get PIR */ - mfspr %r31,SPR_PIR - - /* Get a stack and restore r13 */ - GET_STACK(%r1,%r31) - li %r3,0 - std %r3,0(%r1) - std %r3,8(%r1) - std %r3,16(%r1) - GET_CPU() - - /* Get our TOC */ - addis %r2,%r30,(__toc_start - __head)@ha - addi %r2,%r2,(__toc_start - __head)@l - - /* Go to C ! */ - bl fast_reboot - b . - -.global cleanup_tlb -cleanup_tlb: - /* Clean the TLB */ - li %r3,128 - mtctr %r3 - li %r4,0x800 /* IS field = 0b10 */ - ptesync -1: tlbiel %r4 - addi %r4,%r4,0x1000 - bdnz 1b - ptesync - #define FIXUP_ENDIAN \ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \ b $+36; /* Skip trampoline if endian is good */ \ @@ -624,7 +566,12 @@ reset_wakeup: GET_CPU() /* Restore original stack pointer */ - ld %r1,CPUTHREAD_SAVE_R1(%r13) + ld %r3,CPUTHREAD_SAVE_R1(%r13) + + /* If it's 0, we are doing a fast reboot */ + cmpldi %r3,0 + beq fast_reset_entry + mr %r1,%r3 /* Restore more stuff */ lwz %r3,STACK_CR(%r1) @@ -661,6 +608,46 @@ reset_wakeup: mtlr %r0 blr +/* Fast reset code. We clean up the TLB and a few SPRs and + * return to C code. All CPUs do that, the CPU triggering the + * reset does it to itself last. The C code will sort out who + * the master is. We come from the trampoline above with + * r30 containing SKIBOOT_BASE + */ +fast_reset_entry: + /* Clear out SLB */ + li %r6,0 + slbmte %r6,%r6 + slbia + ptesync + + /* Dummy stack frame */ + li %r3,0 + std %r3,0(%r1) + std %r3,8(%r1) + std %r3,16(%r1) + + /* Get our TOC */ + addis %r2,%r30,(__toc_start - __head)@ha + addi %r2,%r2,(__toc_start - __head)@l + + /* Go to C ! */ + bl fast_reboot_entry + b . + +.global cleanup_tlb +cleanup_tlb: + /* Clean the TLB */ + li %r3,512 + mtctr %r3 + li %r4,0xc00 /* IS field = 0b11 */ + ptesync +1: tlbiel %r4 + addi %r4,%r4,0x1000 + bdnz 1b + ptesync + blr + /* Functions to initialize replicated and shared SPRs to sane * values. This is called at boot and on soft-reset */ @@ -708,10 +695,14 @@ init_shared_sprs: mtspr SPR_LPCR,%r3 sync isync - /* HID0: Clear bit 13 (enable core recovery) */ + /* HID0: Clear bit 13 (enable core recovery) + * Clear bit 19 (HILE) + */ mfspr %r3,SPR_HID0 li %r0,1 - sldi %r0,%r0,(63-13) + sldi %r4,%r0,(63-13) + sldi %r5,%r0,(63-19) + or %r0,%r4,%r5, andc %r3,%r3,%r0 sync mtspr SPR_HID0,%r3 @@ -743,6 +734,15 @@ init_replicated_sprs: /* XXX TODO: Add more */ blr + .global enter_nap +enter_nap: + std %r0,0(%r1) + ptesync + ld %r0,0(%r1) +1: cmp %cr0,%r0,%r0 + bne 1b + nap + b . /* * * NACA structure, accessed by the FPS to find the SPIRA diff --git a/core/fast-reboot.c b/core/fast-reboot.c index 30b77e9..cf8b3d4 100644 --- a/core/fast-reboot.c +++ b/core/fast-reboot.c @@ -25,242 +25,420 @@ #include <timebase.h> #include <pci.h> #include <chip.h> +#include <chiptod.h> + +#define P8_EX_TCTL_DIRECT_CONTROLS(t) (0x10013000 + (t) * 0x10) +#define P8_DIRECT_CTL_STOP PPC_BIT(63) +#define P8_DIRECT_CTL_PRENAP PPC_BIT(47) +#define P8_DIRECT_CTL_SRESET PPC_BIT(60) -/* - * To get control of all threads, we sreset them via XSCOM after - * patching the 0x100 vector. This will work as long as the target - * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider - * a more messy approach. - * - * The SCOM register we want is called "Core RAS Control" in the doc - * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list - * - * Bits in there change from CPU rev to CPU rev but the bit we care - * about, bit 60 "sreset_request" appears to have stuck to the same - * place in both P7 and P7+. The register also has the same SCOM - * address - */ -#define EX0_TCTL_DIRECT_CONTROLS0 0x08010400 -#define EX0_TCTL_DIRECT_CONTROLS1 0x08010440 -#define EX0_TCTL_DIRECT_CONTROLS2 0x08010480 -#define EX0_TCTL_DIRECT_CONTROLS3 0x080104c0 -#define TCTL_DC_SRESET_REQUEST PPC_BIT(60) /* Flag tested by the OPAL entry code */ uint8_t reboot_in_progress; -static struct cpu_thread *resettor, *resettee; +static volatile bool fast_boot_release; +static struct cpu_thread *last_man_standing; +static struct lock reset_lock = LOCK_UNLOCKED; -static void flush_caches(void) +static int set_special_wakeup(struct cpu_thread *cpu) { - uint64_t base = SKIBOOT_BASE; - uint64_t end = base + SKIBOOT_SIZE; + uint64_t val, poll_target, stamp; + uint32_t core_id; + int rc; + + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ - /* Not sure what the effect of sreset is on cores, so let's - * shoot a series of dcbf's on all cachelines that make up - * our core memory just in case... + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); + + prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id); + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return OPAL_SUCCESS; + + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not necessary */ - while(base < end) { - asm volatile("dcbf 0,%0" : : "r" (base) : "memory"); - base += 128; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), + PPC_BIT(0)); + if (rc) { + prerror("RESET: XSCOM error %d asserting special" + " wakeup on 0x%x\n", rc, cpu->pir); + return rc; } - sync(); + + /* + * HWP uses the history for Perf register here, dunno why it uses + * that one instead of the pHyp one, maybe to avoid clobbering it... + * + * In any case, it does that to check for run/nap vs.sleep/winkle/other + * to decide whether to poll on checkstop or not. Since we don't deal + * with checkstop conditions here, we ignore that part. + */ + + /* + * Now poll for completion of special wakeup. The HWP is nasty here, + * it will poll at 5ms intervals for up to 200ms. This is not quite + * acceptable for us at runtime, at least not until we have the + * ability to "context switch" HBRT. In practice, because we don't + * winkle, it will never take that long, so we increase the polling + * frequency to 1us per poll. However we do have to keep the same + * timeout. + * + * We don't use time_wait_ms() either for now as we don't want to + * poll the FSP here. + */ + stamp = mftb(); + poll_target = stamp + msecs_to_tb(200); + val = 0; + while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) { + /* Wait 1 us */ + time_wait_us(1); + + /* Read PM state */ + rc = xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0), + &val); + if (rc) { + prerror("RESET: XSCOM error %d reading PM state on" + " 0x%x\n", rc, cpu->pir); + return rc; + } + /* Check timeout */ + if (mftb() > poll_target) + break; + } + + /* Success ? */ + if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) { + uint64_t now = mftb(); + prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n", + tb_to_usecs(now - stamp)); + return 0; + } + + /* + * We timed out ... + * + * HWP has a complex workaround for HW255321 which affects + * Murano DD1 and Venice DD1. Ignore that for now + * + * Instead we just dump some XSCOMs for error logging + */ + prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir); + prerror("RESET: PM0 = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + prerror("RESET: SPC_WKUP = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_IDLE_STATE_HISTORY_PHYP), + &val); + prerror("RESET: HISTORY = 0x%016llx\n", val); + + return OPAL_HARDWARE; } -static bool do_reset_core_p7(struct cpu_thread *cpu) +static int clr_special_wakeup(struct cpu_thread *cpu) { - uint32_t xscom_addr, chip; - uint64_t ctl; + uint64_t val; + uint32_t core_id; int rc; - /* Add the Core# */ - xscom_addr = EX0_TCTL_DIRECT_CONTROLS0; - xscom_addr |= ((cpu->pir >> 2) & 7) << 24; + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ + + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); - chip = pir_to_chip_id(cpu->pir); + prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id); + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return OPAL_SUCCESS; - ctl = TCTL_DC_SRESET_REQUEST; - rc = xscom_write(chip, xscom_addr, ctl); - rc |= xscom_write(chip, xscom_addr + 0x40, ctl); - rc |= xscom_write(chip, xscom_addr + 0x80, ctl); - rc |= xscom_write(chip, xscom_addr + 0xc0, ctl); + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not necessary + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), 0); if (rc) { - prerror("RESET: Error %d resetting CPU 0x%04x\n", - rc, cpu->pir); - return false; + prerror("RESET: XSCOM error %d deasserting" + " special wakeup on 0x%x\n", rc, cpu->pir); + return rc; } - return true; + + /* + * The original HWp reads the XSCOM again with the comment + * "This puts an inherent delay in the propagation of the reset + * transition" + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + return 0; } -static void fast_reset_p7(void) +extern unsigned long callthru_tcl(const char *str, int len); + +static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits) +{ + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t xscom_addr; + char tcl_cmd[50]; + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + if (bits != P8_DIRECT_CTL_SRESET) + return; + snprintf(tcl_cmd, sizeof(tcl_cmd), "mysim cpu %i:%i set spr pc 0x100", core_id, thread_id); + callthru_tcl(tcl_cmd, strlen(tcl_cmd)); + return; + } + + xscom_addr = XSCOM_ADDR_P8_EX(core_id, + P8_EX_TCTL_DIRECT_CONTROLS(thread_id)); + + xscom_write(chip_id, xscom_addr, bits); +} + +static bool fast_reset_p8(void) { struct cpu_thread *cpu; - resettee = this_cpu(); - resettor = NULL; + /* Mark ourselves as last man standing in need of a reset */ + last_man_standing = this_cpu(); - /* Pick up a candidate resettor. We do that before we flush - * the caches - */ + prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n", + this_cpu()->pir, pir_to_core_id(this_cpu()->pir)); + + /* Assert special wakup on all cores */ for_each_cpu(cpu) { - /* - * Some threads might still be in skiboot. - * - * But because we deal with entire cores and we don't want - * to special case things, we are just going to reset them - * too making the assumption that this is safe, they are - * holding no locks. This can only be true if they don't - * have jobs scheduled which is hopefully the case. - */ - if (cpu->state != cpu_state_os && - cpu->state != cpu_state_active) - continue; + if (cpu->primary == cpu) + if (set_special_wakeup(cpu) != OPAL_SUCCESS) + return false; + } - /* - * Only hit cores and only if they aren't on the same core - * as ourselves - */ - if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) || - cpu->pir & 0x3) - continue; + prlog(PR_DEBUG, "RESET: Stopping the world...\n"); - /* Pick up one of those guys as our "resettor". It will be - * in charge of resetting this CPU. We avoid resetting - * ourselves, not sure how well it would do with SCOM - */ - resettor = cpu; - break; - } + /* Put everybody in stop except myself */ + for_each_cpu(cpu) { + if (cpu != this_cpu()) + set_direct_ctl(cpu, P8_DIRECT_CTL_STOP); - if (!resettor) { - printf("RESET: Can't find a resettor !\n"); - return; + /* Also make sure that saved_r1 is 0 ! That's what will + * make our reset vector jump to fast_reboot_entry + */ + cpu->save_r1 = 0; } - printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n", - this_cpu()->pir, resettor->pir); - printf("RESET: Flushing caches...\n"); + /* Restore skiboot vectors */ + copy_exception_vectors(); + setup_reset_vector(); - /* Is that necessary ? */ - flush_caches(); + prlog(PR_DEBUG, "RESET: Pre-napping all threads but one...\n"); - /* Reset everybody except self and except resettor */ + /* Put everybody in pre-nap except myself */ for_each_cpu(cpu) { - if (cpu->state != cpu_state_os && - cpu->state != cpu_state_active) - continue; - if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) || - cpu->pir & 0x3) - continue; - if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor)) - continue; + if (cpu != this_cpu()) + set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP); + } - printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir); + prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n"); - if (!do_reset_core_p7(cpu)) - return; + /* Reset everybody except my own core threads */ + for_each_cpu(cpu) { + if (cpu != this_cpu()) + set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET); } - /* Reset the resettor last because it's going to kill me ! */ - printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir); - if (!do_reset_core_p7(resettor)) - return; - - /* Don't return */ - for (;;) - ; + return true; } -void fast_reset(void) +void fast_reboot(void) { - uint32_t pvr = mfspr(SPR_PVR); - extern uint32_t fast_reset_patch_start; - extern uint32_t fast_reset_patch_end; - uint32_t *dst, *src; + bool success; + + if (proc_gen != proc_gen_p8) { + prlog(PR_DEBUG, + "RESET: Fast reboot not available on this CPU\n"); + return; + } + if (chip_quirk(QUIRK_NO_DIRECT_CTL)) { + prlog(PR_DEBUG, + "RESET: Fast reboot disabled by quirk\n"); + return; + } - printf("RESET: Fast reboot request !\n"); + prlog(PR_INFO, "RESET: Initiating fast reboot...\n"); /* XXX We need a way to ensure that no other CPU is in skiboot * holding locks (via the OPAL APIs) and if they are, we need - * for them to get out + * for them to get out. Hopefully that isn't happening, but... + * + * To fix this properly, we want to keep track of OPAL entry/exit + * on all CPUs. */ reboot_in_progress = 1; time_wait_ms(200); - /* Copy reset trampoline */ - printf("RESET: Copying reset trampoline...\n"); - src = &fast_reset_patch_start; - dst = (uint32_t *)0x100; - while(src < &fast_reset_patch_end) - *(dst++) = *(src++); - sync_icache(); - - switch(PVR_TYPE(pvr)) { - case PVR_TYPE_P7: - case PVR_TYPE_P7P: - fast_reset_p7(); + /* Lock so the new guys coming don't reset us */ + lock(&reset_lock); + + fast_boot_release = false; + + success = fast_reset_p8(); + + /* Unlock, at this point we go away */ + unlock(&reset_lock); + + if (success) { + if (!next_cpu(first_cpu())) + /* Only 1 CPU, so fake reset ourselves */ + asm volatile("ba 0x100 " : : : ); + /* Don't return */ + for (;;) + ; } } static void cleanup_cpu_state(void) { - if (cpu_is_thread0(this_cpu())) { - cleanup_tlb(); + struct cpu_thread *cpu = this_cpu(); + + cpu->current_hile = false; + + /* Per core cleanup */ + if (cpu_is_thread0(cpu)) { + /* Shared SPRs whacked back to normal */ + + /* XXX Update the SLW copies ! Also dbl check HIDs etc... */ init_shared_sprs(); + + /* If somebody was in fast_sleep, we may have a workaround + * to undo + */ + if (cpu->in_fast_sleep) { + prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep" + " undoing workarounds...\n", cpu->pir); + fast_sleep_exit(); + } + + /* And we might have lost TB sync */ + chiptod_wakeup_resync(); + + /* The TLB surely contains garbage */ + cleanup_tlb(); } + + /* Per-thread additional cleanup */ init_replicated_sprs(); - reset_cpu_icp(); + + // XXX Cleanup SLW, check HIDs ... } -#ifdef FAST_REBOOT_CLEARS_MEMORY -static void fast_mem_clear(uint64_t start, uint64_t end) +void __noreturn enter_nap(void); + +static void check_split_core(void) { - printf("MEMORY: Clearing %llx..%llx\n", start, end); + struct cpu_thread *cpu; + u64 mask, hid0; + + hid0 = mfspr(SPR_HID0); + mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE; - while(start < end) { - asm volatile("dcbz 0,%0" : : "r" (start) : "memory"); - start += 128; + if ((hid0 & mask) == 0) + return; + + prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir); + + /* If it's a secondary thread, just send it to nap */ + if (this_cpu()->pir & 7) { + /* Prepare to be woken up */ + icp_prep_for_pm(); + /* Setup LPCR to wakeup on external interrupts only */ + mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) | + SPR_LPCR_P8_PECE2)); + /* Go to nap (doesn't return) */ + enter_nap(); } -} -static void memory_reset(void) -{ - struct address_range *i; - uint64_t skistart = SKIBOOT_BASE; - uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE; - - printf("MEMORY: Clearing ...\n"); - - list_for_each(&address_ranges, i, list) { - uint64_t start = cleanup_addr(i->arange->start); - uint64_t end = cleanup_addr(i->arange->end); - - if (start >= skiend || end <= skistart) - fast_mem_clear(start, end); - else { - if (start < skistart) - fast_mem_clear(start, skistart); - if (end > skiend) - fast_mem_clear(skiend, end); - } + prlog(PR_INFO, "RESET: Primary, unsplitting... \n"); + + /* Trigger unsplit operation and update SLW image */ + hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS; + set_hid0(hid0); + opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0); + + /* Wait for unsplit */ + while (mfspr(SPR_HID0) & mask) + cpu_relax(); + + /* Now the guys are sleeping, wake'em up. They will come back + * via reset and continue the fast reboot process normally. + * No need to wait. + */ + prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n"); + + for_each_cpu(cpu) { + if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu())) + continue; + icp_kick_cpu(cpu); } } -#endif /* FAST_REBOOT_CLEARS_MEMORY */ + /* Entry from asm after a fast reset */ -void __noreturn fast_reboot(void); +void __noreturn fast_reboot_entry(void); -void __noreturn fast_reboot(void) +void __noreturn fast_reboot_entry(void) { - static volatile bool fast_boot_release; struct cpu_thread *cpu; - printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir); + prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", this_cpu()->pir); + time_wait_ms(100); - /* If this CPU was chosen as the resettor, it must reset the - * resettee (the one that initiated the whole process + lock(&reset_lock); + if (last_man_standing && next_cpu(first_cpu())) { + prlog(PR_DEBUG, "RESET: last man standing fixup...\n"); + set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP); + set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET); + } + last_man_standing = NULL; + unlock(&reset_lock); + + /* We reset our ICP first ! Otherwise we might get stray interrupts + * when unsplitting + */ + reset_cpu_icp(); + + /* If we are split, we need to unsplit. Since that can send us + * to NAP, which will come back via reset, we do it now */ - if (this_cpu() == resettor) - do_reset_core_p7(resettee); + check_split_core(); /* Are we the original boot CPU ? If not, we spin waiting * for a relase signal from CPU 1, then we clean ourselves @@ -277,8 +455,10 @@ void __noreturn fast_reboot(void) __secondary_cpu_entry(); } + prlog(PR_INFO, "RESET: Boot CPU waiting for everybody...\n"); + /* We are the original boot CPU, wait for secondaries to - * be captured + * be captured. */ for_each_cpu(cpu) { if (cpu == this_cpu()) @@ -292,7 +472,7 @@ void __noreturn fast_reboot(void) smt_medium(); } - printf("INIT: Releasing secondaries...\n"); + prlog(PR_INFO, "RESET: Releasing secondaries...\n"); /* Release everybody */ fast_boot_release = true; @@ -310,7 +490,14 @@ void __noreturn fast_reboot(void) } } - printf("INIT: All done, resetting everything else...\n"); + prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n"); + + for_each_cpu(cpu) { + if (cpu->primary == cpu) + clr_special_wakeup(cpu); + } + + prlog(PR_INFO, "RESET: All done, cleaning up...\n"); /* Clear release flag for next time */ fast_boot_release = false; @@ -322,6 +509,12 @@ void __noreturn fast_reboot(void) /* Set our state to active */ this_cpu()->state = cpu_state_active; + /* We can now do NAP mode */ + cpu_set_pm_enable(true); + + /* Start preloading kernel and ramdisk */ + start_preload_kernel(); + /* Poke the consoles (see comments in the code there) */ fsp_console_reset(); @@ -331,15 +524,6 @@ void __noreturn fast_reboot(void) /* Remove all PCI devices */ pci_reset(); - /* Reset IO Hubs */ - cec_reset(); - - /* Re-Initialize all discovered PCI slots */ - pci_init_slots(); - - /* Clear memory */ -#ifdef FAST_REBOOT_CLEARS_MEMORY - memory_reset(); -#endif + /* Load and boot payload */ load_and_boot_kernel(true); } diff --git a/core/init.c b/core/init.c index 12fd7a7..bc14da7 100644 --- a/core/init.c +++ b/core/init.c @@ -303,7 +303,7 @@ extern uint64_t boot_offset; static size_t initramfs_size; -static bool start_preload_kernel(void) +bool start_preload_kernel(void) { int loaded; @@ -434,6 +434,9 @@ static void load_initramfs(void) { int loaded; + dt_check_del_prop(dt_chosen, "linux,initrd-start"); + dt_check_del_prop(dt_chosen, "linux,initrd-end"); + loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS, RESOURCE_SUBID_NONE); @@ -499,6 +502,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot) occ_pstates_init(); /* Set kernel command line argument if specified */ + dt_check_del_prop(dt_chosen, "bootargs"); cmdline = nvram_query("bootargs"); #ifdef KERNEL_COMMAND_LINE if (!cmdline) @@ -646,7 +650,7 @@ void setup_reset_vector(void) *(dst++) = *(src++); } -static void copy_exception_vectors(void) +void copy_exception_vectors(void) { /* Backup previous vectors as this could contain a kernel * image. diff --git a/core/lock.c b/core/lock.c index 53cc337..e82048b 100644 --- a/core/lock.c +++ b/core/lock.c @@ -110,6 +110,9 @@ void unlock(struct lock *l) this_cpu()->lock_depth--; l->lock_val = 0; + /* WARNING: On fast reboot, we can be reset right at that + * point, so the reset_lock in there cannot be in the con path + */ if (l->in_con_path) { cpu->con_suspend--; if (cpu->con_suspend == 0 && cpu->con_need_flush) @@ -1456,6 +1456,7 @@ static void __pci_reset(struct list_head *list) while ((pd = list_pop(list, struct pci_device, link)) != NULL) { __pci_reset(&pd->children); + dt_free(pd->dn); free(pd); } } @@ -1472,10 +1473,17 @@ void pci_reset(void) * state machine could be done in parallel) */ for (i = 0; i < ARRAY_SIZE(phbs); i++) { - if (!phbs[i]) + struct phb *phb = phbs[i]; + if (!phb) continue; - __pci_reset(&phbs[i]->devices); + __pci_reset(&phb->devices); + if (phb->ops->ioda_reset) + phb->ops->ioda_reset(phb, true); } + + /* Re-Initialize all discovered PCI slots */ + pci_init_slots(); + } static void pci_do_jobs(void (*fn)(void *)) diff --git a/core/platform.c b/core/platform.c index 7672914..b37346e 100644 --- a/core/platform.c +++ b/core/platform.c @@ -24,6 +24,7 @@ #include <xscom.h> #include <errorlog.h> #include <bt.h> +#include <nvram.h> bool manufacturing_mode = false; struct platform platform; @@ -54,10 +55,10 @@ static int64_t opal_cec_reboot(void) console_complete_flush(); -#ifdef ENABLE_FAST_RESET - /* Try a fast reset first */ - fast_reset(); -#endif + /* Try a fast reset first, if enabled */ + if (nvram_query_eq("experimental-fast-reset","feeling-lucky")) + fast_reboot(); + if (platform.cec_reboot) return platform.cec_reboot(); diff --git a/hw/fsp/fsp-console.c b/hw/fsp/fsp-console.c index 44d24cc..0080d73 100644 --- a/hw/fsp/fsp-console.c +++ b/hw/fsp/fsp-console.c @@ -892,6 +892,9 @@ static void reopen_all_hvsi(void) void fsp_console_reset(void) { + if (!fsp_present()) + return; + prlog(PR_NOTICE, "FSP: Console reset !\n"); /* This is called on a fast-reset. To work around issues with HVSI @@ -1001,6 +1004,8 @@ void fsp_console_select_stdout(void) */ } } + dt_check_del_prop(dt_chosen, "linux,stdout-path"); + if (fsp_serials[1].open && use_serial) { dt_add_property_string(dt_chosen, "linux,stdout-path", "/ibm,opal/consoles/serial@1"); diff --git a/hw/fsp/fsp-leds.c b/hw/fsp/fsp-leds.c index 50e82b5..b5a32ad 100644 --- a/hw/fsp/fsp-leds.c +++ b/hw/fsp/fsp-leds.c @@ -1570,6 +1570,9 @@ void create_led_device_nodes(void) if (!pled) return; + /* Check if already populated (fast-reboot) */ + if (dt_has_node_property(pled, "compatible", NULL)) + return; dt_add_property_strings(pled, "compatible", DT_PROPERTY_LED_COMPATIBLE); led_mode = dt_prop_get(pled, DT_PROPERTY_LED_MODE); @@ -517,10 +517,14 @@ void occ_pstates_init(void) struct proc_chip *chip; struct cpu_thread *c; s8 pstate_nom; + static bool occ_pstates_initialized; /* OCC is P8 only */ if (proc_gen != proc_gen_p8) return; + /* Handle fast reboots */ + if (occ_pstates_initialized) + return; chip = next_chip(NULL); if (!chip->homer_base) { @@ -558,6 +562,7 @@ void occ_pstates_init(void) for_each_chip(chip) chip->throttle = 0; opal_add_poller(occ_throttle_poll, NULL); + occ_pstates_initialized = true; } struct occ_load_req { @@ -378,6 +378,36 @@ static uint64_t psi_p7_irq_attributes(struct irq_source *is __unused, return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_FREQUENT; } +static const uint32_t psi_p8_irq_to_xivr[P8_IRQ_PSI_IRQ_COUNT] = { + [P8_IRQ_PSI_FSP] = PSIHB_XIVR_FSP, + [P8_IRQ_PSI_OCC] = PSIHB_XIVR_OCC, + [P8_IRQ_PSI_FSI] = PSIHB_XIVR_FSI, + [P8_IRQ_PSI_LPC] = PSIHB_XIVR_LPC, + [P8_IRQ_PSI_LOCAL_ERR] = PSIHB_XIVR_LOCAL_ERR, + [P8_IRQ_PSI_EXTERNAL]= PSIHB_XIVR_HOST_ERR, +}; + +static void psi_cleanup_irq(struct psi *psi) +{ + uint32_t irq; + uint64_t xivr, xivr_p; + + for (irq = 0; irq < P8_IRQ_PSI_IRQ_COUNT; irq++) { + prlog(PR_DEBUG, "PSI[0x%03x]: Cleaning up IRQ %d\n", + psi->chip_id, irq); + + xivr_p = psi_p8_irq_to_xivr[irq]; + xivr = in_be64(psi->regs + xivr_p); + xivr |= (0xffull << 32); + out_be64(psi->regs + xivr_p, xivr); + time_wait_ms_nopoll(10); + xivr = in_be64(psi->regs + xivr_p); + if (xivr & PPC_BIT(39)) { + printf(" Need EOI !\n"); + icp_send_eoi(psi->interrupt + irq); + } + } +} /* Called on a fast reset, make sure we aren't stuck with * an accepted and never EOId PSI interrupt @@ -385,27 +415,13 @@ static uint64_t psi_p7_irq_attributes(struct irq_source *is __unused, void psi_irq_reset(void) { struct psi *psi; - uint64_t xivr; printf("PSI: Hot reset!\n"); - assert(proc_gen == proc_gen_p7); + assert(proc_gen == proc_gen_p8); list_for_each(&psis, psi, list) { - /* Mask the interrupt & clean the XIVR */ - xivr = 0x000000ff00000000UL; - xivr |= P7_IRQ_BUID(psi->interrupt) << 16; - out_be64(psi->regs + PSIHB_XIVR, xivr); - -#if 0 /* Seems to checkstop ... */ - /* - * Maybe not anymore; we were just blindly sending - * this on all iopaths, not just the active one; - * We don't even know if those psis are even correct. - */ - /* Send a dummy EOI to make sure the ICP is clear */ - icp_send_eoi(psi->interrupt); -#endif + psi_cleanup_irq(psi); } } @@ -416,34 +432,17 @@ static const struct irq_source_ops psi_p7_irq_ops = { .attributes = psi_p7_irq_attributes, }; + static int64_t psi_p8_set_xive(struct irq_source *is, uint32_t isn, uint16_t server, uint8_t priority) { struct psi *psi = is->data; uint64_t xivr_p, xivr; + uint32_t irq_idx = isn & 7; - switch(isn & 7) { - case P8_IRQ_PSI_FSP: - xivr_p = PSIHB_XIVR_FSP; - break; - case P8_IRQ_PSI_OCC: - xivr_p = PSIHB_XIVR_OCC; - break; - case P8_IRQ_PSI_FSI: - xivr_p = PSIHB_XIVR_FSI; - break; - case P8_IRQ_PSI_LPC: - xivr_p = PSIHB_XIVR_LPC; - break; - case P8_IRQ_PSI_LOCAL_ERR: - xivr_p = PSIHB_XIVR_LOCAL_ERR; - break; - case P8_IRQ_PSI_EXTERNAL: - xivr_p = PSIHB_XIVR_HOST_ERR; - break; - default: - return OPAL_PARAMETER; - } + if (irq_idx >= P8_IRQ_PSI_IRQ_COUNT) + return OPAL_PARAMETER; + xivr_p = psi_p8_irq_to_xivr[irq_idx]; /* Populate the XIVR */ xivr = (uint64_t)server << 40; @@ -460,29 +459,12 @@ static int64_t psi_p8_get_xive(struct irq_source *is, uint32_t isn __unused, { struct psi *psi = is->data; uint64_t xivr_p, xivr; + uint32_t irq_idx = isn & 7; - switch(isn & 7) { - case P8_IRQ_PSI_FSP: - xivr_p = PSIHB_XIVR_FSP; - break; - case P8_IRQ_PSI_OCC: - xivr_p = PSIHB_XIVR_OCC; - break; - case P8_IRQ_PSI_FSI: - xivr_p = PSIHB_XIVR_FSI; - break; - case P8_IRQ_PSI_LPC: - xivr_p = PSIHB_XIVR_LPC; - break; - case P8_IRQ_PSI_LOCAL_ERR: - xivr_p = PSIHB_XIVR_LOCAL_ERR; - break; - case P8_IRQ_PSI_EXTERNAL: - xivr_p = PSIHB_XIVR_HOST_ERR; - break; - default: - return OPAL_PARAMETER; - } + if (irq_idx >= P8_IRQ_PSI_IRQ_COUNT) + return OPAL_PARAMETER; + + xivr_p = psi_p8_irq_to_xivr[irq_idx]; /* Read & decode the XIVR */ xivr = in_be64(psi->regs + xivr_p); @@ -1053,3 +1035,4 @@ void psi_init(void) psi_init_psihb(np); } + @@ -1060,6 +1060,8 @@ static void fast_sleep_enter(void) } primary_thread->save_l2_fir_action1 = tmp; + primary_thread->in_fast_sleep = true; + tmp = tmp & ~0x0200000000000000ULL; rc = xscom_write(chip_id, XSCOM_ADDR_P8_EX(core, L2_FIR_ACTION1), tmp); @@ -1082,7 +1084,7 @@ static void fast_sleep_enter(void) /* Workarounds while exiting fast-sleep */ -static void fast_sleep_exit(void) +void fast_sleep_exit(void) { uint32_t core = pir_to_core_id(this_cpu()->pir); uint32_t chip_id = this_cpu()->chip_id; @@ -1090,6 +1092,7 @@ static void fast_sleep_exit(void) int rc; primary_thread = this_cpu()->primary; + primary_thread->in_fast_sleep = false; rc = xscom_write(chip_id, XSCOM_ADDR_P8_EX(core, L2_FIR_ACTION1), primary_thread->save_l2_fir_action1); @@ -1131,7 +1134,7 @@ static int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t enter) opal_call(OPAL_CONFIG_CPU_IDLE_STATE, opal_config_cpu_idle_state, 2); #ifdef __HAVE_LIBPORE__ -static int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val) +int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val) { struct cpu_thread *c = find_cpu_by_pir(cpu_pir); diff --git a/include/config.h b/include/config.h index 2524570..f6572b0 100644 --- a/include/config.h +++ b/include/config.h @@ -72,9 +72,6 @@ */ //#define FORCE_DUMMY_CONSOLE 1 -/* Enable this to do fast resets. Currently unreliable... */ -//#define ENABLE_FAST_RESET 1 - /* Enable this to make fast reboot clear memory */ //#define FAST_REBOOT_CLEARS_MEMORY 1 diff --git a/include/cpu.h b/include/cpu.h index 341e73d..f649a13 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -66,6 +66,7 @@ struct cpu_thread { bool in_mcount; bool in_poller; bool in_reinit; + bool in_fast_sleep; bool in_sleep; bool in_idle; uint32_t hbrt_spec_wakeup; /* primary only */ diff --git a/include/processor.h b/include/processor.h index 4b11702..3942268 100644 --- a/include/processor.h +++ b/include/processor.h @@ -164,6 +164,7 @@ /* Bits in HID0 */ #define SPR_HID0_POWER8_4LPARMODE PPC_BIT(2) #define SPR_HID0_POWER8_2LPARMODE PPC_BIT(6) +#define SPR_HID0_POWER8_DYNLPARDIS PPC_BIT(15) #define SPR_HID0_POWER8_HILE PPC_BIT(19) #define SPR_HID0_POWER9_HILE PPC_BIT(4) #define SPR_HID0_POWER8_ENABLE_ATTN PPC_BIT(31) diff --git a/include/skiboot.h b/include/skiboot.h index 2a9f5e2..2ef7677 100644 --- a/include/skiboot.h +++ b/include/skiboot.h @@ -190,12 +190,14 @@ extern unsigned long get_symbol(unsigned long addr, char **sym, char **sym_end); /* Fast reboot support */ -extern void fast_reset(void); +extern void fast_reboot(void); extern void __noreturn __secondary_cpu_entry(void); extern void __noreturn load_and_boot_kernel(bool is_reboot); extern void cleanup_tlb(void); extern void init_shared_sprs(void); extern void init_replicated_sprs(void); +extern bool start_preload_kernel(void); +extern void copy_exception_vectors(void); extern void setup_reset_vector(void); /* Various probe routines, to replace with an initcall system */ @@ -269,6 +271,11 @@ extern void slw_update_timer_expiry(uint64_t new_target); /* Is SLW timer available ? */ extern bool slw_timer_ok(void); +/* Patch SPR in SLW image */ +extern int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); + +extern void fast_sleep_exit(void); + /* Fallback fake RTC */ extern void fake_rtc_init(void); diff --git a/platforms/mambo/mambo.c b/platforms/mambo/mambo.c index 64248ef..bd151ed 100644 --- a/platforms/mambo/mambo.c +++ b/platforms/mambo/mambo.c @@ -92,6 +92,8 @@ static inline int callthru3(int command, unsigned long arg1, unsigned long arg2, #define BOGUS_DISK_WRITE 117 #define BOGUS_DISK_INFO 118 +#define CALL_TCL 86 + static inline int callthru_disk_read(int id, void *buf, unsigned long sect, unsigned long nrsect) { @@ -112,6 +114,14 @@ static inline unsigned long callthru_disk_info(int op, int id) (unsigned long)id); } +extern unsigned long callthru_tcl(const char *str, int len); + +unsigned long callthru_tcl(const char *str, int len) +{ + prlog(PR_DEBUG, "Sending TCL to Mambo, cmd: %s\n", str); + return callthru2(CALL_TCL, (unsigned long)str, (unsigned long)len); +} + struct bogus_disk_info { unsigned long size; int id; |