aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2017-11-29 15:37:02 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2017-12-03 22:10:55 -0600
commit688c0d51e6e9773e7ad47eb4f6ce71ea032c671d (patch)
tree4119506ce784644cb83427b78199276b9d630600
parent0d84ea6bda03a00a1765dd7240a9e5231a537e96 (diff)
downloadskiboot-688c0d51e6e9773e7ad47eb4f6ce71ea032c671d.zip
skiboot-688c0d51e6e9773e7ad47eb4f6ce71ea032c671d.tar.gz
skiboot-688c0d51e6e9773e7ad47eb4f6ce71ea032c671d.tar.bz2
fast-reboot: quiesce opal before initiating a fast reboot
Switch fast reboot to use quiescing rather than "wait for a while". If firmware can not be quiesced, then fast reboot is skipped. This significantly improves the robustness of fast reboot in the face of bugs or unexpected latencies. Complexity of synchronization in fast-reboot is reduced, because we are guaranteed to be single-threaded when quiesce succeeds, so locks can be removed. In the case that firmware can be quiesced, then it will generally reduce fast reboot times by nearly 200ms, because quiescing usually takes very little time. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
-rw-r--r--asm/head.S10
-rw-r--r--core/fast-reboot.c47
-rw-r--r--core/platform.c6
3 files changed, 24 insertions, 39 deletions
diff --git a/asm/head.S b/asm/head.S
index 68a4e7b..eccf070 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -960,12 +960,6 @@ opal_entry:
addis %r2,%r2,(__toc_start - __head)@ha
addi %r2,%r2,(__toc_start - __head)@l
- /* Check for a reboot in progress */
- LOAD_ADDR_FROM_TOC(%r12, reboot_in_progress)
- lbz %r12,0(%r12)
- cmpwi %r12,0
- bne 2f
-
/* Check entry */
mr %r3,%r1
bl opal_entry_check
@@ -1003,10 +997,6 @@ opal_entry:
ld %r1,STACK_GPR1(%r1)
blr
-2: /* Reboot in progress, reject all calls */
- li %r3,OPAL_BUSY
- b 1b
-
.global start_kernel
start_kernel:
sync
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 4b7e9aa..949a106 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -30,9 +30,7 @@
#include <direct-controls.h>
/* Flag tested by the OPAL entry code */
-uint8_t reboot_in_progress;
static volatile bool fast_boot_release;
-static struct lock reset_lock = LOCK_UNLOCKED;
static bool cpu_state_wait_all_others(enum cpu_thread_state state,
unsigned long timeout_tb)
@@ -67,13 +65,10 @@ extern void *fdt;
extern struct lock capi_lock;
static const char *fast_reboot_disabled = NULL;
-static struct lock fast_reboot_disabled_lock = LOCK_UNLOCKED;
void disable_fast_reboot(const char *reason)
{
- lock(&fast_reboot_disabled_lock);
fast_reboot_disabled = reason;
- unlock(&fast_reboot_disabled_lock);
}
void fast_reboot(void)
@@ -93,45 +88,37 @@ void fast_reboot(void)
return;
}
- lock(&fast_reboot_disabled_lock);
+ /*
+ * Ensure all other CPUs have left OPAL calls.
+ */
+ if (!opal_quiesce(QUIESCE_HOLD, -1)) {
+ prlog(PR_DEBUG, "RESET: Fast reboot disabled because OPAL quiesce timed out\n");
+ return;
+ }
+
if (fast_reboot_disabled) {
prlog(PR_DEBUG, "RESET: Fast reboot disabled because %s\n",
fast_reboot_disabled);
- unlock(&fast_reboot_disabled_lock);
+ opal_quiesce(QUIESCE_RESUME, -1);
return;
}
- unlock(&fast_reboot_disabled_lock);
prlog(PR_NOTICE, "RESET: Initiating fast reboot %d...\n", ++fast_reboot_count);
free(fdt);
- /* XXX We need a way to ensure that no other CPU is in skiboot
- * holding locks (via the OPAL APIs) and if they are, we need
- * for them to get out. Hopefully that isn't happening, but...
- *
- * To fix this properly, we want to keep track of OPAL entry/exit
- * on all CPUs.
- */
- reboot_in_progress = 1;
- time_wait_ms(200);
-
- /* Lock so the new guys coming don't reset us */
- lock(&reset_lock);
-
fast_boot_release = false;
sync();
/* Put everybody in stop except myself */
- if (sreset_all_prepare())
+ if (sreset_all_prepare()) {
+ opal_quiesce(QUIESCE_RESUME, -1);
return;
-
- /* Now everyone else is stopped */
- unlock(&reset_lock);
+ }
/*
- * There is no point clearing special wakeup due to failure after this
- * point, because we will be going to full IPL. Less cleanup work means
- * less opportunity to fail.
+ * There is no point clearing special wakeup or un-quiesce due to
+ * failure after this point, because we will be going to full IPL.
+ * Less cleanup work means less opportunity to fail.
*/
for_each_ungarded_cpu(cpu) {
@@ -156,6 +143,9 @@ void fast_reboot(void)
prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n");
sreset_all_finish();
+ /* This resets our quiesce state ready to enter the new kernel. */
+ opal_quiesce(QUIESCE_RESUME_FAST_REBOOT, -1);
+
asm volatile("ba 0x100\n\t" : : : "memory");
for (;;)
;
@@ -304,7 +294,6 @@ void __noreturn fast_reboot_entry(void)
/* Clear release flag for next time */
fast_boot_release = false;
- reboot_in_progress = 0;
/* Cleanup ourselves */
cleanup_cpu_state();
diff --git a/core/platform.c b/core/platform.c
index 732f67e..6816fe5 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -41,6 +41,8 @@ static int64_t opal_cec_power_down(uint64_t request)
{
prlog(PR_NOTICE, "OPAL: Shutdown request type 0x%llx...\n", request);
+ opal_quiesce(QUIESCE_HOLD, -1);
+
console_complete_flush();
if (platform.cec_power_down)
@@ -54,6 +56,8 @@ static int64_t opal_cec_reboot(void)
{
prlog(PR_NOTICE, "OPAL: Reboot request...\n");
+ opal_quiesce(QUIESCE_HOLD, -1);
+
console_complete_flush();
/* Try fast-reset unless explicitly disabled */
@@ -71,6 +75,8 @@ static int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag)
{
struct errorlog *buf;
+ opal_quiesce(QUIESCE_HOLD, -1);
+
switch (reboot_type) {
case OPAL_REBOOT_NORMAL:
return opal_cec_reboot();