aboutsummaryrefslogtreecommitdiff
path: root/hw/chiptod.c
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2018-04-16 23:03:56 +0530
committerStewart Smith <stewart@linux.ibm.com>2018-04-17 03:52:10 -0500
commit8ff9be76345a5e46bdf18853dab13f6becb95580 (patch)
treeb9a31468b340f892f9fe27edb8f6604f3e2cb675 /hw/chiptod.c
parent67d738807da0bbd6fe73e30d25753b6de1299df8 (diff)
downloadskiboot-8ff9be76345a5e46bdf18853dab13f6becb95580.zip
skiboot-8ff9be76345a5e46bdf18853dab13f6becb95580.tar.gz
skiboot-8ff9be76345a5e46bdf18853dab13f6becb95580.tar.bz2
opal/hmi: Fix soft lockups during TOD errors
There are some TOD errors which do not affect working of TOD and TB. They stay in valid state. Hence we don't need rendez vous for TOD errors that does not affect TB working. TOD errors that affects TOD/TB will report a global error on TFMR[44] alongwith bit 51, and they will go in rendez vous path as expected. But the TOD errors that does not affect TB register sets only TFMR bit 51. The TFMR bit 51 is cleared when any single thread clears the TOD error. Once cleared, the bit 51 is reflected to all the cores on that chip. Any thread that reads the TFMR register after the error is cleared will see TFMR bit 51 reset. Hence the threads that see TFMR[51]=1, falls through rendez-vous path and threads that see TFMR[51]=0, returns doing nothing. This ends up in a soft lockups in host kernel. This patch fixes this issue by not considering TOD interrupt (TFMR[51]) as a core-global error and hence avoiding rendez-vous path completely. Instead threads that see TFMR[51]=1 will now take different path that just do the TOD error recovery. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Diffstat (limited to 'hw/chiptod.c')
-rw-r--r--hw/chiptod.c14
1 files changed, 12 insertions, 2 deletions
diff --git a/hw/chiptod.c b/hw/chiptod.c
index f6ef9a4..33d5539 100644
--- a/hw/chiptod.c
+++ b/hw/chiptod.c
@@ -970,7 +970,7 @@ bool chiptod_wakeup_resync(void)
return false;
}
-static int chiptod_recover_tod_errors(void)
+static int __chiptod_recover_tod_errors(void)
{
uint64_t terr;
uint64_t treset = 0;
@@ -1026,6 +1026,16 @@ static int chiptod_recover_tod_errors(void)
return 1;
}
+int chiptod_recover_tod_errors(void)
+{
+ int rc;
+
+ lock(&chiptod_lock);
+ rc = __chiptod_recover_tod_errors();
+ unlock(&chiptod_lock);
+ return rc;
+}
+
static int32_t chiptod_get_active_master(void)
{
if (current_topology < 0)
@@ -1550,7 +1560,7 @@ int chiptod_recover_tb_errors(bool *out_resynced)
* Bit 33 of TOD error register indicates sync check error.
*/
if (tfmr & SPR_TFMR_CHIP_TOD_INTERRUPT)
- rc = chiptod_recover_tod_errors();
+ rc = __chiptod_recover_tod_errors();
/* Check if TB is running. If not then we need to get it running. */
if (!(tfmr & SPR_TFMR_TB_VALID)) {