aboutsummaryrefslogtreecommitdiff
path: root/core/hmi.c
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2017-10-23 11:46:00 +0530
committerStewart Smith <stewart@linux.vnet.ibm.com>2017-10-23 12:37:52 -0500
commit00f2540c3c69c922771a73fda2ef83f49aaee0b6 (patch)
treeed319fa1a6bc8ff3c0aa8139bbf1440a782c586a /core/hmi.c
parentd1bb483e84c8819a0e2a7c89f1daa52432446e14 (diff)
downloadskiboot-00f2540c3c69c922771a73fda2ef83f49aaee0b6.zip
skiboot-00f2540c3c69c922771a73fda2ef83f49aaee0b6.tar.gz
skiboot-00f2540c3c69c922771a73fda2ef83f49aaee0b6.tar.bz2
opal/hmi: Workaround Power9 hw logic bug for couple of TFMR TB errors.
Add a workaround for a HW logic bug in Power9 where TB residue and HDEC parity errors cleared by one thread aren't visible to other threads of same core. The TB reside and HDEC parity error are reported through TFMR bit 45 and 26 respectively. If any of the thread from the core clears the TFMR bit 26 and 45, only thread 0 is able to see that errors are cleared but rest of the threads 1, 2 and 3 do not see those as cleared. This causes TB error recovery to fail for TB residue and HDEC parity errors. TFMR is per core register and any changes made by a one thread should be visible by other threads of the same core. On TB residue error (TFMR bit 45), TB goes into invalid state. Hence avoid handling/clearing TB residue error if TB is valid and running. Use TFMR bit 41 to check validity of TB state. For HDEC parity error (TFMR bit 26), check for other errors on TFMR register and ignore the pre-recovery for HDEC parity error. If TFMR has any other TB error bits set alongwith HDEC parity error we can safely ignore handling of HDEC parity error. Also, while clearing HDEC parity error bit from TFMR, allow only thread 0 to clear it. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'core/hmi.c')
-rw-r--r--core/hmi.c28
1 files changed, 27 insertions, 1 deletions
diff --git a/core/hmi.c b/core/hmi.c
index 7093b73..07c0846 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -179,6 +179,14 @@
/* Number of iterations for the various timeouts */
#define TIMEOUT_LOOPS 20000000
+/* TFMR other errors. (other than bit 26 and 45) */
+#define SPR_TFMR_OTHER_ERRORS \
+ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \
+ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \
+ SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \
+ SPR_TFMR_DEC_PARITY_ERR | SPR_TFMR_TFMR_CORRUPT | \
+ SPR_TFMR_CHIP_TOD_INTERRUPT)
+
static const struct core_xstop_bit_info {
uint8_t bit; /* CORE FIR bit number */
enum OpalHMI_CoreXstopReason reason;
@@ -654,7 +662,12 @@ static void wait_for_cleanup_complete(void)
*/
static void timer_facility_do_cleanup(uint64_t tfmr)
{
- if (tfmr & SPR_TFMR_TB_RESIDUE_ERR) {
+ /*
+ * Workaround for HW logic bug in Power9. Do not reset the
+ * TB register if TB is valid and running.
+ */
+ if ((tfmr & SPR_TFMR_TB_RESIDUE_ERR) && !(tfmr & SPR_TFMR_TB_VALID)) {
+
/* Reset the TB register to clear the dirty data. */
mtspr(SPR_TBWU, 0);
mtspr(SPR_TBWL, 0);
@@ -841,6 +854,19 @@ static void pre_recovery_cleanup_p9(void)
}
/*
+ * Due to a HW logic bug in p9, TFMR bit 26 and 45 always set
+ * once TB residue or HDEC errors occurs at first time. Hence for HMI
+ * on subsequent TB errors add additional check as workaround to
+ * identify validity of the errors and decide whether pre-recovery
+ * is required or not. Exit pre-recovery if there are other TB
+ * errors also present on TFMR.
+ */
+ if (tfmr & SPR_TFMR_OTHER_ERRORS) {
+ unlock(&hmi_lock);
+ return;
+ }
+
+ /*
* First thread on the core ?
* if yes, setup the hmi cleanup state to !DONE
*/