aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/hmi.c28
-rw-r--r--hw/chiptod.c27
2 files changed, 54 insertions, 1 deletions
diff --git a/core/hmi.c b/core/hmi.c
index 7093b73..07c0846 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -179,6 +179,14 @@
/* Number of iterations for the various timeouts */
#define TIMEOUT_LOOPS 20000000
+/* TFMR other errors. (other than bit 26 and 45) */
+#define SPR_TFMR_OTHER_ERRORS \
+ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \
+ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \
+ SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \
+ SPR_TFMR_DEC_PARITY_ERR | SPR_TFMR_TFMR_CORRUPT | \
+ SPR_TFMR_CHIP_TOD_INTERRUPT)
+
static const struct core_xstop_bit_info {
uint8_t bit; /* CORE FIR bit number */
enum OpalHMI_CoreXstopReason reason;
@@ -654,7 +662,12 @@ static void wait_for_cleanup_complete(void)
*/
static void timer_facility_do_cleanup(uint64_t tfmr)
{
- if (tfmr & SPR_TFMR_TB_RESIDUE_ERR) {
+ /*
+ * Workaround for HW logic bug in Power9. Do not reset the
+ * TB register if TB is valid and running.
+ */
+ if ((tfmr & SPR_TFMR_TB_RESIDUE_ERR) && !(tfmr & SPR_TFMR_TB_VALID)) {
+
/* Reset the TB register to clear the dirty data. */
mtspr(SPR_TBWU, 0);
mtspr(SPR_TBWL, 0);
@@ -841,6 +854,19 @@ static void pre_recovery_cleanup_p9(void)
}
/*
+ * Due to a HW logic bug in p9, TFMR bit 26 and 45 always set
+ * once TB residue or HDEC errors occurs at first time. Hence for HMI
+ * on subsequent TB errors add additional check as workaround to
+ * identify validity of the errors and decide whether pre-recovery
+ * is required or not. Exit pre-recovery if there are other TB
+ * errors also present on TFMR.
+ */
+ if (tfmr & SPR_TFMR_OTHER_ERRORS) {
+ unlock(&hmi_lock);
+ return;
+ }
+
+ /*
* First thread on the core ?
* if yes, setup the hmi cleanup state to !DONE
*/
diff --git a/hw/chiptod.c b/hw/chiptod.c
index 1dd1b26..b9e4774 100644
--- a/hw/chiptod.c
+++ b/hw/chiptod.c
@@ -1478,6 +1478,7 @@ int chiptod_recover_tb_errors(void)
{
uint64_t tfmr;
int rc = -1;
+ int thread_id;
if (chiptod_primary < 0)
return 0;
@@ -1503,6 +1504,17 @@ int chiptod_recover_tb_errors(void)
tfmr = mfspr(SPR_TFMR);
/*
+ * Workaround for HW logic bug in Power9
+ * Even after clearing TB residue error by one thread it does not
+ * get reflected to other threads on same core.
+ * Check if TB is already valid and skip the checking of TB errors.
+ */
+
+ if ((proc_gen == proc_gen_p9) && (tfmr & SPR_TFMR_TB_RESIDUE_ERR)
+ && (tfmr & SPR_TFMR_TB_VALID))
+ goto skip_tb_error_clear;
+
+ /*
* Check for TB errors.
* On Sync check error, bit 44 of TFMR is set. Check for it and
* clear it.
@@ -1525,6 +1537,7 @@ int chiptod_recover_tb_errors(void)
}
}
+skip_tb_error_clear:
/*
* Check for TOD sync check error.
* On TOD errors, bit 51 of TFMR is set. If this bit is on then we
@@ -1559,6 +1572,20 @@ int chiptod_recover_tb_errors(void)
}
/*
+ * Workaround for HW logic bug in power9.
+ * In idea case (without the HW bug) only one thread from the core
+ * would have fallen through tfmr_recover_non_tb_errors() to clear
+ * HDEC parity error on TFMR.
+ *
+ * Hence to achieve same behavior, allow only thread 0 to clear the
+ * HDEC parity error. And for rest of the threads just reset the bit
+ * to avoid other threads to fall through tfmr_recover_non_tb_errors().
+ */
+ thread_id = cpu_get_thread_index(this_cpu());
+ if ((proc_gen == proc_gen_p9) && thread_id)
+ tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR;
+
+ /*
* Now that TB is running, check for TFMR non-TB errors.
*/
if ((tfmr & SPR_TFMR_HDEC_PARITY_ERROR) ||