aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorVaidyanathan Srinivasan <svaidy@linux.ibm.com>2021-06-25 11:49:16 +0530
committerVasant Hegde <hegdevasant@linux.vnet.ibm.com>2021-06-30 15:05:54 +0530
commit8545bb2ac8e0e17af29f149745e4798e86821d36 (patch)
tree57c59ef9269318f7c82d4e1988f12eb830834c0b /core
parent9d52c580d3fabbea6b276b98f925ab0fceebb96c (diff)
downloadskiboot-8545bb2ac8e0e17af29f149745e4798e86821d36.zip
skiboot-8545bb2ac8e0e17af29f149745e4798e86821d36.tar.gz
skiboot-8545bb2ac8e0e17af29f149745e4798e86821d36.tar.bz2
cpu: Add retry in cpu_pm_disable to kick cpus out of idle
cpu_pm_idle sets pm_enabled = false and expected all cpus to exit idle. This is needed to re-enter with new settings. Right after cpu_bringup() we call copy_sreset_vector() and then cpu_set_sreset_enable(true). At this time some cpus are still yet to enter idle and hence miss the doorbell to wakeup. This leads to cpu_pm_idle waiting forever. This pattern happens on some system in fused-core mode. The fact that pm_enabled flag is changing right in the middle of idle entry is see from the "cpu_idle_p9 called with pm disabled" traces. One method to fix this race is to retry the door-bell after a timeout. This patch implements a small time out (few seconds) and then issues the doorbell once again to kick the cpu that entered idle late after missing the pm_enabled = false flag. This checking loop run in smt_lowest() and hence the timeout number maps to couple of seconds which is sufficient to let the cpus settle in idle and make them see the doorbell and exit. Example boot log: [ 288.309322810,7] INIT: CPU PIR 0x000d called in [ 288.309320768,7] INIT: CPU PIR 0x000b called in [ 288.314603802,7] INIT: CPU PIR 0x0020 called in [ 288.321303468,5] CPU: All 88 processors called in... [ 288.315056796,6] cpu_idle_p9 called on cpu 0x024e with pm disabled [ 288.321308091,6] cpu_idle_p9 called on cpu 0x0264 with pm disabled [ 288.314424259,6] cpu_idle_p9 called on cpu 0x025b with pm disabled [ 288.324928307,6] cpu_idle_p9 called on cpu 0x0065 with pm disabled [ 305.207316004,6] cpu_pm_disable TIMEOUT on cpu 0x0261 to exit idle [ 322.093298501,6] cpu_pm_disable TIMEOUT on cpu 0x0263 to exit idle [ 338.491281028,6] cpu_pm_disable TIMEOUT on cpu 0x0265 to exit idle [ 355.377263492,6] cpu_pm_disable TIMEOUT on cpu 0x0267 to exit idle [ 372.263245960,6] cpu_pm_disable TIMEOUT on cpu 0x0269 to exit idle [ 389.149228389,6] cpu_pm_disable TIMEOUT on cpu 0x026b to exit idle [ 406.035210852,6] cpu_pm_disable TIMEOUT on cpu 0x026d to exit idle [ 422.433193381,6] cpu_pm_disable TIMEOUT on cpu 0x026f to exit idle [ 422.433277720,6] CHIPTOD: Calculated MCBS is 0x25 (Cfreq=2000000000 Tfreq=32000000) Reported-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> [Reworded commit message - Vasant] Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Diffstat (limited to 'core')
-rw-r--r--core/cpu.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/core/cpu.c b/core/cpu.c
index d30bef8..f2b5bbc 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -437,7 +437,7 @@ static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
unsigned int vec = 0;
if (!pm_enabled) {
- prlog_once(PR_DEBUG, "cpu_idle_p9 called pm disabled\n");
+ prlog(PR_DEBUG, "cpu_idle_p9 called on cpu 0x%04x with pm disabled\n", cpu->pir);
return vec;
}
@@ -593,6 +593,7 @@ no_pm:
static void cpu_pm_disable(void)
{
struct cpu_thread *cpu;
+ unsigned int timeout;
pm_enabled = false;
sync();
@@ -610,10 +611,18 @@ static void cpu_pm_disable(void)
p9_dbell_send(cpu->pir);
}
+ /* This code is racy with cpus entering idle, late ones miss the dbell */
+
smt_lowest();
for_each_available_cpu(cpu) {
- while (cpu->in_sleep || cpu->in_idle)
+ timeout = 0x08000000;
+ while ((cpu->in_sleep || cpu->in_idle) && --timeout)
barrier();
+ if (!timeout) {
+ prlog(PR_DEBUG, "cpu_pm_disable TIMEOUT on cpu 0x%04x to exit idle\n",
+ cpu->pir);
+ p9_dbell_send(cpu->pir);
+ }
}
smt_medium();
}