aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/hmi.c62
1 files changed, 59 insertions, 3 deletions
diff --git a/core/hmi.c b/core/hmi.c
index 17983a3..846d2b9 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -163,6 +163,9 @@
#define P8_CORE_FIR 0x10013100
#define P9_CORE_FIR 0x20010A40
+/* And core WOF (Whose On First) */
+#define P9_CORE_WOF 0x20010A48
+
/* xscom addresses for pMisc Receive Malfunction Alert Register */
#define P8_MALFUNC_ALERT 0x02020011
#define P9_MALFUNC_ALERT 0x00090022
@@ -215,6 +218,28 @@ static const struct core_xstop_bit_info {
{ 63, CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ },
};
+static const struct core_recoverable_bit_info {
+ uint8_t bit; /* CORE FIR bit number */
+ const char *reason;
+} recoverable_bits[] = {
+ { 0, "IFU - SRAM (ICACHE parity, etc)" },
+ { 2, "IFU - RegFile" },
+ { 4, "IFU - Logic" },
+ { 9, "ISU - RegFile" },
+ { 11, "ISU - Logic" },
+ { 13, "ISU - Recoverable due to not in MT window" },
+ { 24, "VSU - Logic" },
+ { 27, "VSU - DFU logic" },
+ { 29, "LSU - SRAM (DCACHE parity, etc)" },
+ { 31, "LSU - RegFile" },
+ /* The following 3 bits may be set by SRAM errors. */
+ { 33, "LSU - TLB multi hit" },
+ { 34, "LSU - SLB multi hit" },
+ { 35, "LSU - ERAT multi hit" },
+ { 37, "LSU - Logic" },
+ { 39, "LSU - Recoverable due to not in MT window" },
+};
+
static const struct nx_xstop_bit_info {
uint8_t bit; /* NX FIR bit number */
enum OpalHMI_NestAccelXstopReason reason;
@@ -313,6 +338,21 @@ static int read_core_fir(uint32_t chip_id, uint32_t core_id, uint64_t *core_fir)
return rc;
}
+static int read_core_wof(uint32_t chip_id, uint32_t core_id, uint64_t *core_wof)
+{
+ int rc;
+
+ switch (proc_gen) {
+ case proc_gen_p9:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P9_EC(core_id, P9_CORE_WOF), core_wof);
+ break;
+ default:
+ rc = OPAL_HARDWARE;
+ }
+ return rc;
+}
+
static bool decode_core_fir(struct cpu_thread *cpu,
struct OpalHMIEvent *hmi_evt)
{
@@ -1069,6 +1109,7 @@ static void hmi_print_debug(const uint8_t *msg)
int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
{
+ struct cpu_thread *cpu = this_cpu();
int recover = 1;
uint64_t tfmr;
@@ -1084,18 +1125,33 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
* looking at TFMR register. TFMR will tell us correct state of
* TB register.
*/
- this_cpu()->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID);
+ cpu->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID);
prlog(PR_DEBUG, "Received HMI interrupt: HMER = 0x%016llx\n", hmer);
if (hmi_evt)
hmi_evt->hmer = hmer;
if (hmer & SPR_HMER_PROC_RECV_DONE) {
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint64_t core_wof;
+
+ hmi_print_debug("Processor recovery occurred.");
+ if (!read_core_wof(chip_id, core_id, &core_wof)) {
+ int i;
+
+ prlog(PR_DEBUG, "Core WOF = 0x%016llx recovered error:\n", core_wof);
+ for (i = 0; i < ARRAY_SIZE(recoverable_bits); i++) {
+ if (core_wof & PPC_BIT(recoverable_bits[i].bit))
+ prlog(PR_DEBUG, "%s\n",
+ recoverable_bits[i].reason);
+ }
+ }
+
hmer &= ~SPR_HMER_PROC_RECV_DONE;
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
queue_hmi_event(hmi_evt, recover);
}
- hmi_print_debug("Processor recovery Done.");
}
if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
@@ -1180,7 +1236,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
mtspr(SPR_HMER, hmer);
hmi_exit();
/* Set the TB state looking at TFMR register before we head out. */
- this_cpu()->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID);
+ cpu->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID);
unlock(&hmi_lock);
return recover;
}