1 files changed, 361 insertions, 177 deletions
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 30b77e9..cf8b3d4 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -25,242 +25,420 @@
 #include <timebase.h>
 #include <pci.h>
 #include <chip.h>
+#include <chiptod.h>
+
+#define P8_EX_TCTL_DIRECT_CONTROLS(t)	(0x10013000 + (t) * 0x10)
+#define P8_DIRECT_CTL_STOP		PPC_BIT(63)
+#define P8_DIRECT_CTL_PRENAP		PPC_BIT(47)
+#define P8_DIRECT_CTL_SRESET		PPC_BIT(60)
 
-/*
- * To get control of all threads, we sreset them via XSCOM after
- * patching the 0x100 vector. This will work as long as the target
- * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider
- * a more messy approach.
- *
- * The SCOM register we want is called "Core RAS Control" in the doc
- * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list
- *
- * Bits in there change from CPU rev to CPU rev but the bit we care
- * about, bit 60 "sreset_request" appears to have stuck to the same
- * place in both P7 and P7+. The register also has the same SCOM
- * address
- */
-#define EX0_TCTL_DIRECT_CONTROLS0	0x08010400
-#define EX0_TCTL_DIRECT_CONTROLS1	0x08010440
-#define EX0_TCTL_DIRECT_CONTROLS2	0x08010480
-#define EX0_TCTL_DIRECT_CONTROLS3	0x080104c0
-#define   TCTL_DC_SRESET_REQUEST	PPC_BIT(60)
 
 /* Flag tested by the OPAL entry code */
 uint8_t reboot_in_progress;
-static struct cpu_thread *resettor, *resettee;
+static volatile bool fast_boot_release;
+static struct cpu_thread *last_man_standing;
+static struct lock reset_lock = LOCK_UNLOCKED;
 
-static void flush_caches(void)
+static int set_special_wakeup(struct cpu_thread *cpu)
 {
-	uint64_t base = SKIBOOT_BASE;
-	uint64_t end = base + SKIBOOT_SIZE;
+	uint64_t val, poll_target, stamp;
+	uint32_t core_id;
+	int rc;
+
+	/*
+	 * Note: HWP checks for checkstops, but I assume we don't need to
+	 * as we wouldn't be running if one was present
+	 */
 
-	/* Not sure what the effect of sreset is on cores, so let's
-	 * shoot a series of dcbf's on all cachelines that make up
-	 * our core memory just in case...
+	/* Grab core ID once */
+	core_id = pir_to_core_id(cpu->pir);
+
+	prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id);
+	if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+		return OPAL_SUCCESS;
+
+	/*
+	 * The original HWp reads the XSCOM first but ignores the result
+	 * and error, let's do the same until I know for sure that is
+	 * not necessary
 	 */
-	while(base < end) {
-		asm volatile("dcbf 0,%0" : : "r" (base) : "memory");
-		base += 128;
+	xscom_read(cpu->chip_id,
+		   XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+		   &val);
+
+	/* Then we write special wakeup */
+	rc = xscom_write(cpu->chip_id,
+			 XSCOM_ADDR_P8_EX_SLAVE(core_id,
+						EX_PM_SPECIAL_WAKEUP_PHYP),
+			 PPC_BIT(0));
+	if (rc) {
+		prerror("RESET: XSCOM error %d asserting special"
+			" wakeup on 0x%x\n", rc, cpu->pir);
+		return rc;
 	}
-	sync();
+
+	/*
+	 * HWP uses the history for Perf register here, dunno why it uses
+	 * that one instead of the pHyp one, maybe to avoid clobbering it...
+	 *
+	 * In any case, it does that to check for run/nap vs.sleep/winkle/other
+	 * to decide whether to poll on checkstop or not. Since we don't deal
+	 * with checkstop conditions here, we ignore that part.
+	 */
+
+	/*
+	 * Now poll for completion of special wakeup. The HWP is nasty here,
+	 * it will poll at 5ms intervals for up to 200ms. This is not quite
+	 * acceptable for us at runtime, at least not until we have the
+	 * ability to "context switch" HBRT. In practice, because we don't
+	 * winkle, it will never take that long, so we increase the polling
+	 * frequency to 1us per poll. However we do have to keep the same
+	 * timeout.
+	 *
+	 * We don't use time_wait_ms() either for now as we don't want to
+	 * poll the FSP here.
+	 */
+	stamp = mftb();
+	poll_target = stamp + msecs_to_tb(200);
+	val = 0;
+	while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) {
+		/* Wait 1 us */
+		time_wait_us(1);
+
+		/* Read PM state */
+		rc = xscom_read(cpu->chip_id,
+				XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0),
+				&val);
+		if (rc) {
+			prerror("RESET: XSCOM error %d reading PM state on"
+				" 0x%x\n", rc, cpu->pir);
+			return rc;
+		}
+		/* Check timeout */
+		if (mftb() > poll_target)
+			break;
+	}
+
+	/* Success ? */
+	if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) {
+		uint64_t now = mftb();
+		prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n",
+		      tb_to_usecs(now - stamp));
+		return 0;
+	}
+
+	/*
+	 * We timed out ...
+	 *
+	 * HWP has a complex workaround for HW255321 which affects
+	 * Murano DD1 and Venice DD1. Ignore that for now
+	 *
+	 * Instead we just dump some XSCOMs for error logging
+	 */
+	prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir);
+	prerror("RESET:      PM0 = 0x%016llx\n", val);
+	val = -1;
+	xscom_read(cpu->chip_id,
+		   XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+		   &val);
+	prerror("RESET: SPC_WKUP = 0x%016llx\n", val);
+	val = -1;
+	xscom_read(cpu->chip_id,
+		   XSCOM_ADDR_P8_EX_SLAVE(core_id,
+					  EX_PM_IDLE_STATE_HISTORY_PHYP),
+		   &val);
+	prerror("RESET:  HISTORY = 0x%016llx\n", val);
+
+	return OPAL_HARDWARE;
 }
 
-static bool do_reset_core_p7(struct cpu_thread *cpu)
+static int clr_special_wakeup(struct cpu_thread *cpu)
 {
-	uint32_t xscom_addr, chip;
-	uint64_t ctl;
+	uint64_t val;
+	uint32_t core_id;
 	int rc;
 
-	/* Add the Core# */
-	xscom_addr = EX0_TCTL_DIRECT_CONTROLS0;
-	xscom_addr |= ((cpu->pir >> 2) & 7) << 24;
+	/*
+	 * Note: HWP checks for checkstops, but I assume we don't need to
+	 * as we wouldn't be running if one was present
+	 */
+
+	/* Grab core ID once */
+	core_id = pir_to_core_id(cpu->pir);
 
-	chip = pir_to_chip_id(cpu->pir);
+	prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id);
+	if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+		return OPAL_SUCCESS;
 
-	ctl = TCTL_DC_SRESET_REQUEST;
-	rc = xscom_write(chip, xscom_addr, ctl);
-	rc |= xscom_write(chip, xscom_addr + 0x40, ctl);
-	rc |= xscom_write(chip, xscom_addr + 0x80, ctl);
-	rc |= xscom_write(chip, xscom_addr + 0xc0, ctl);
+	/*
+	 * The original HWp reads the XSCOM first but ignores the result
+	 * and error, let's do the same until I know for sure that is
+	 * not necessary
+	 */
+	xscom_read(cpu->chip_id,
+		   XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+		   &val);
+
+	/* Then we write special wakeup */
+	rc = xscom_write(cpu->chip_id,
+			 XSCOM_ADDR_P8_EX_SLAVE(core_id,
+						EX_PM_SPECIAL_WAKEUP_PHYP), 0);
 	if (rc) {
-		prerror("RESET: Error %d resetting CPU 0x%04x\n",
-			rc, cpu->pir);
-		return false;
+		prerror("RESET: XSCOM error %d deasserting"
+			" special wakeup on 0x%x\n", rc, cpu->pir);
+		return rc;
 	}
-	return true;
+
+	/*
+	 * The original HWp reads the XSCOM again with the comment
+	 * "This puts an inherent delay in the propagation of the reset
+	 * transition"
+	 */
+	xscom_read(cpu->chip_id,
+		   XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+		   &val);
+
+	return 0;
 }
 
-static void fast_reset_p7(void)
+extern unsigned long callthru_tcl(const char *str, int len);
+
+static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
+{
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t xscom_addr;
+	char tcl_cmd[50];
+
+	if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+		if (bits != P8_DIRECT_CTL_SRESET)
+			return;
+		snprintf(tcl_cmd, sizeof(tcl_cmd), "mysim cpu %i:%i set spr pc 0x100", core_id, thread_id);
+		callthru_tcl(tcl_cmd, strlen(tcl_cmd));
+		return;
+	}
+
+	xscom_addr = XSCOM_ADDR_P8_EX(core_id,
+				      P8_EX_TCTL_DIRECT_CONTROLS(thread_id));
+
+	xscom_write(chip_id, xscom_addr, bits);
+}
+
+static bool fast_reset_p8(void)
 {
 	struct cpu_thread *cpu;
 
-	resettee = this_cpu();
-	resettor = NULL;
+	/* Mark ourselves as last man standing in need of a reset */
+	last_man_standing = this_cpu();
 
-	/* Pick up a candidate resettor. We do that before we flush
-	 * the caches
-	 */
+	prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n",
+	      this_cpu()->pir, pir_to_core_id(this_cpu()->pir));
+
+	/* Assert special wakup on all cores */
 	for_each_cpu(cpu) {
-		/*
-		 * Some threads might still be in skiboot.
-		 *
-		 * But because we deal with entire cores and we don't want
-		 * to special case things, we are just going to reset them
-		 * too making the assumption that this is safe, they are
-		 * holding no locks. This can only be true if they don't
-		 * have jobs scheduled which is hopefully the case.
-		 */
-		if (cpu->state != cpu_state_os &&
-		    cpu->state != cpu_state_active)
-			continue;
+		if (cpu->primary == cpu)
+			if (set_special_wakeup(cpu) != OPAL_SUCCESS)
+				return false;
+	}
 
-		/*
-		 * Only hit cores and only if they aren't on the same core
-		 * as ourselves
-		 */
-		if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
-		    cpu->pir & 0x3)
-			continue;
+	prlog(PR_DEBUG, "RESET: Stopping the world...\n");
 
-		/* Pick up one of those guys as our "resettor". It will be
-		 * in charge of resetting this CPU. We avoid resetting
-		 * ourselves, not sure how well it would do with SCOM
-		 */
-		resettor = cpu;
-		break;
-	}
+	/* Put everybody in stop except myself */
+	for_each_cpu(cpu) {
+		if (cpu != this_cpu())
+			set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
 
-	if (!resettor) {
-		printf("RESET: Can't find a resettor !\n");
-		return;
+		/* Also make sure that saved_r1 is 0 ! That's what will
+		 * make our reset vector jump to fast_reboot_entry
+		 */
+		cpu->save_r1 = 0;
 	}
-	printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n",
-	       this_cpu()->pir, resettor->pir);
 
-	printf("RESET: Flushing caches...\n");
+	/* Restore skiboot vectors  */
+	copy_exception_vectors();
+	setup_reset_vector();
 
-	/* Is that necessary ? */
-	flush_caches();
+	prlog(PR_DEBUG, "RESET: Pre-napping all threads but one...\n");
 
-	/* Reset everybody except self and except resettor */
+	/* Put everybody in pre-nap except myself */
 	for_each_cpu(cpu) {
-		if (cpu->state != cpu_state_os &&
-		    cpu->state != cpu_state_active)
-			continue;
-		if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
-		    cpu->pir & 0x3)
-			continue;
-		if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor))
-			continue;
+		if (cpu != this_cpu())
+			set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
+	}
 
-		printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir);
+	prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
 
-		if (!do_reset_core_p7(cpu))
-			return;
+	/* Reset everybody except my own core threads */
+	for_each_cpu(cpu) {
+		if (cpu != this_cpu())
+			set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
 	}
 
-	/* Reset the resettor last because it's going to kill me ! */
-	printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir);
-	if (!do_reset_core_p7(resettor))
-		return;
-
-	/* Don't return */
-	for (;;)
-		;
+	return true;
 }
 
-void fast_reset(void)
+void fast_reboot(void)
 {
-	uint32_t pvr = mfspr(SPR_PVR);
-	extern uint32_t fast_reset_patch_start;
-	extern uint32_t fast_reset_patch_end;
-	uint32_t *dst, *src;
+	bool success;
+
+	if (proc_gen != proc_gen_p8) {
+		prlog(PR_DEBUG,
+		      "RESET: Fast reboot not available on this CPU\n");
+		return;
+	}
+	if (chip_quirk(QUIRK_NO_DIRECT_CTL)) {
+		prlog(PR_DEBUG,
+		      "RESET: Fast reboot disabled by quirk\n");
+		return;
+	}
 
-	printf("RESET: Fast reboot request !\n");
+	prlog(PR_INFO, "RESET: Initiating fast reboot...\n");
 
 	/* XXX We need a way to ensure that no other CPU is in skiboot
 	 * holding locks (via the OPAL APIs) and if they are, we need
-	 * for them to get out
+	 * for them to get out. Hopefully that isn't happening, but...
+	 *
+	 * To fix this properly, we want to keep track of OPAL entry/exit
+	 * on all CPUs.
 	 */
 	reboot_in_progress = 1;
 	time_wait_ms(200);
 
-	/* Copy reset trampoline */
-	printf("RESET: Copying reset trampoline...\n");
-	src = &fast_reset_patch_start;
-	dst = (uint32_t *)0x100;
-	while(src < &fast_reset_patch_end)
-		*(dst++) = *(src++);
-	sync_icache();
-
-	switch(PVR_TYPE(pvr)) {
-	case PVR_TYPE_P7:
-	case PVR_TYPE_P7P:
-		fast_reset_p7();
+	/* Lock so the new guys coming don't reset us */
+	lock(&reset_lock);
+
+	fast_boot_release = false;
+
+	success = fast_reset_p8();
+
+	/* Unlock, at this point we go away */
+	unlock(&reset_lock);
+
+	if (success) {
+		if (!next_cpu(first_cpu()))
+			/* Only 1 CPU, so fake reset ourselves */
+			asm volatile("ba 0x100 " : : : );
+		/* Don't return */
+		for (;;)
+			;
 	}
 }
 
 static void cleanup_cpu_state(void)
 {
-	if (cpu_is_thread0(this_cpu())) {
-		cleanup_tlb();
+	struct cpu_thread *cpu = this_cpu();
+
+	cpu->current_hile = false;
+
+	/* Per core cleanup */
+	if (cpu_is_thread0(cpu)) {
+		/* Shared SPRs whacked back to normal */
+
+		/* XXX Update the SLW copies ! Also dbl check HIDs etc... */
 		init_shared_sprs();
+
+		/* If somebody was in fast_sleep, we may have a workaround
+		 * to undo
+		 */
+		if (cpu->in_fast_sleep) {
+			prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep"
+			      " undoing workarounds...\n", cpu->pir);
+			fast_sleep_exit();
+		}
+
+		/* And we might have lost TB sync */
+		chiptod_wakeup_resync();
+
+		/* The TLB surely contains garbage */
+		cleanup_tlb();
 	}
+
+	/* Per-thread additional cleanup */
 	init_replicated_sprs();
-	reset_cpu_icp();
+
+	// XXX Cleanup SLW, check HIDs ...
 }
 
-#ifdef FAST_REBOOT_CLEARS_MEMORY
-static void fast_mem_clear(uint64_t start, uint64_t end)
+void __noreturn enter_nap(void);
+
+static void check_split_core(void)
 {
-	printf("MEMORY: Clearing %llx..%llx\n", start, end);
+	struct cpu_thread *cpu;
+	u64 mask, hid0;
+
+        hid0 = mfspr(SPR_HID0);
+	mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE;
 
-	while(start < end) {
-		asm volatile("dcbz 0,%0" : : "r" (start) : "memory");
-		start += 128;
+	if ((hid0 & mask) == 0)
+		return;
+
+	prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir);
+
+	/* If it's a secondary thread, just send it to nap */
+	if (this_cpu()->pir & 7) {
+		/* Prepare to be woken up */
+		icp_prep_for_pm();
+		/* Setup LPCR to wakeup on external interrupts only */
+		mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) |
+				 SPR_LPCR_P8_PECE2));
+		/* Go to nap (doesn't return) */
+		enter_nap();
 	}
-}
 
-static void memory_reset(void)
-{
-	struct address_range *i;
-	uint64_t skistart = SKIBOOT_BASE;
-	uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE;
-
-	printf("MEMORY: Clearing ...\n");
-
-	list_for_each(&address_ranges, i, list) {
-		uint64_t start = cleanup_addr(i->arange->start);
-		uint64_t end = cleanup_addr(i->arange->end);
-
-		if (start >= skiend || end <= skistart)
-			fast_mem_clear(start, end);
-		else {
-			if (start < skistart)
-				fast_mem_clear(start, skistart);
-			if (end > skiend)
-				fast_mem_clear(skiend, end);
-		}
+	prlog(PR_INFO, "RESET: Primary, unsplitting... \n");
+
+	/* Trigger unsplit operation and update SLW image */
+	hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS;
+	set_hid0(hid0);
+	opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0);
+
+	/* Wait for unsplit */
+	while (mfspr(SPR_HID0) & mask)
+		cpu_relax();
+
+	/* Now the guys are sleeping, wake'em up. They will come back
+	 * via reset and continue the fast reboot process normally.
+	 * No need to wait.
+	 */
+	prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n");
+
+	for_each_cpu(cpu) {
+		if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu()))
+			continue;
+		icp_kick_cpu(cpu);
 	}
 }
-#endif /* FAST_REBOOT_CLEARS_MEMORY */
+
 
 /* Entry from asm after a fast reset */
-void __noreturn fast_reboot(void);
+void __noreturn fast_reboot_entry(void);
 
-void __noreturn fast_reboot(void)
+void __noreturn fast_reboot_entry(void)
 {
-	static volatile bool fast_boot_release;
 	struct cpu_thread *cpu;
 
-	printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir);
+	prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", this_cpu()->pir);
+	time_wait_ms(100);
 
-	/* If this CPU was chosen as the resettor, it must reset the
-	 * resettee (the one that initiated the whole process
+	lock(&reset_lock);
+	if (last_man_standing && next_cpu(first_cpu())) {
+		prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
+		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
+		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
+	}
+	last_man_standing = NULL;
+	unlock(&reset_lock);
+
+	/* We reset our ICP first ! Otherwise we might get stray interrupts
+	 * when unsplitting
+	 */
+	reset_cpu_icp();
+
+	/* If we are split, we need to unsplit. Since that can send us
+	 * to NAP, which will come back via reset, we do it now
 	 */
-	if (this_cpu() == resettor)
-		do_reset_core_p7(resettee);
+	check_split_core();
 
 	/* Are we the original boot CPU ? If not, we spin waiting
 	 * for a relase signal from CPU 1, then we clean ourselves
@@ -277,8 +455,10 @@ void __noreturn fast_reboot(void)
 		__secondary_cpu_entry();
 	}
 
+	prlog(PR_INFO, "RESET: Boot CPU waiting for everybody...\n");
+
 	/* We are the original boot CPU, wait for secondaries to
-	 * be captured
+	 * be captured.
 	 */
 	for_each_cpu(cpu) {
 		if (cpu == this_cpu())
@@ -292,7 +472,7 @@ void __noreturn fast_reboot(void)
 		smt_medium();
 	}
 
-	printf("INIT: Releasing secondaries...\n");
+	prlog(PR_INFO, "RESET: Releasing secondaries...\n");
 
 	/* Release everybody */
 	fast_boot_release = true;
@@ -310,7 +490,14 @@ void __noreturn fast_reboot(void)
 		}
 	}
 
-	printf("INIT: All done, resetting everything else...\n");
+	prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n");
+
+	for_each_cpu(cpu) {
+		if (cpu->primary == cpu)
+			clr_special_wakeup(cpu);
+	}
+
+	prlog(PR_INFO, "RESET: All done, cleaning up...\n");
 
 	/* Clear release flag for next time */
 	fast_boot_release = false;
@@ -322,6 +509,12 @@ void __noreturn fast_reboot(void)
 	/* Set our state to active */
 	this_cpu()->state = cpu_state_active;
 
+	/* We can now do NAP mode */
+	cpu_set_pm_enable(true);
+
+	/* Start preloading kernel and ramdisk */
+	start_preload_kernel();
+
 	/* Poke the consoles (see comments in the code there) */
 	fsp_console_reset();
 
@@ -331,15 +524,6 @@ void __noreturn fast_reboot(void)
 	/* Remove all PCI devices */
 	pci_reset();
 
-	/* Reset IO Hubs */
-	cec_reset();
-
-	/* Re-Initialize all discovered PCI slots */
-	pci_init_slots();
-
-	/* Clear memory */
-#ifdef FAST_REBOOT_CLEARS_MEMORY
-	memory_reset();
-#endif
+	/* Load and boot payload */
 	load_and_boot_kernel(true);
 }