diff options
-rw-r--r-- | core/errorlog.c | 13 | ||||
-rw-r--r-- | core/opal.c | 12 | ||||
-rw-r--r-- | hw/fsp/fsp-surveillance.c | 11 | ||||
-rw-r--r-- | hw/fsp/fsp.c | 61 | ||||
-rw-r--r-- | include/errorlog.h | 13 | ||||
-rw-r--r-- | include/fsp.h | 6 |
6 files changed, 83 insertions, 33 deletions
diff --git a/core/errorlog.c b/core/errorlog.c index 179e09f..522dfcc 100644 --- a/core/errorlog.c +++ b/core/errorlog.c @@ -1,4 +1,4 @@ -/* Copyright 2013-2016 IBM Corp. +/* Copyright 2013-2017 IBM Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -196,7 +196,7 @@ void log_append_msg(struct errorlog *buf, const char *fmt, ...) log_append_data(buf, err_msg, strlen(err_msg)); } -void log_simple_error(struct opal_err_info *e_info, const char *fmt, ...) +uint32_t log_simple_error(struct opal_err_info *e_info, const char *fmt, ...) { struct errorlog *buf; va_list list; @@ -212,10 +212,13 @@ void log_simple_error(struct opal_err_info *e_info, const char *fmt, ...) buf = opal_elog_create(e_info, 0); if (buf == NULL) { prerror("ELOG: Error getting buffer to log error\n"); - } else { - log_append_data(buf, err_msg, strlen(err_msg)); - log_commit(buf); + return -1; } + + log_append_data(buf, err_msg, strlen(err_msg)); + log_commit(buf); + + return buf->plid; } int elog_init(void) diff --git a/core/opal.c b/core/opal.c index 73223b1..14357cc 100644 --- a/core/opal.c +++ b/core/opal.c @@ -30,6 +30,7 @@ #include <opal-msg.h> #include <timer.h> #include <elf-abi.h> +#include <errorlog.h> /* Pending events to signal via opal_poll_events */ uint64_t opal_pending_events; @@ -51,6 +52,13 @@ static uint64_t opal_dynamic_events; extern uint32_t attn_trigger; extern uint32_t hir_trigger; +/* We make this look like a Surveillance error, even though it really + * isn't one. + */ +DEFINE_LOG_ENTRY(OPAL_INJECTED_HIR, OPAL_MISC_ERR_EVT, OPAL_SURVEILLANCE, + OPAL_SURVEILLANCE_ERR, OPAL_PREDICTIVE_ERR_GENERAL, + OPAL_MISCELLANEOUS_INFO_ONLY); + void opal_table_init(void) { struct opal_table_entry *s = __opal_table_start; @@ -408,7 +416,9 @@ static int64_t opal_poll_events(__be64 *outstanding_event_mask) /* Test the host initiated reset */ if (hir_trigger == 0xdeadbeef) { - fsp_trigger_reset(); + uint32_t plid = log_simple_error(&e_info(OPAL_INJECTED_HIR), + "SURV: Injected HIR, initiating FSP R/R\n"); + fsp_trigger_reset(plid); hir_trigger = 0; } diff --git a/hw/fsp/fsp-surveillance.c b/hw/fsp/fsp-surveillance.c index d3e5c45..202b093 100644 --- a/hw/fsp/fsp-surveillance.c +++ b/hw/fsp/fsp-surveillance.c @@ -82,15 +82,12 @@ static void fsp_surv_check_timeout(void) * just go ahead and check timeouts. */ if (tb_compare(now, surv_ack_timer) == TB_AAFTERB) { - /* XXX: We should be logging a PEL to the host, assuming - * the FSP is dead, pending a R/R. - */ - log_simple_error(&e_info(OPAL_RC_SURVE_ACK), + uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_ACK), "SURV: Surv ACK timed out; initiating R/R\n"); /* Reset the pending trigger too */ fsp_surv_ack_pending = false; - fsp_trigger_reset(); + fsp_trigger_reset(plid); } return; @@ -149,10 +146,10 @@ static void fsp_surv_got_param(uint32_t param_id __unused, int err_len, void *data __unused) { if (err_len != 4) { - log_simple_error(&e_info(OPAL_RC_SURVE_STATUS), + uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_STATUS), "SURV: Error (%d) retrieving surv status; initiating R/R\n", err_len); - fsp_trigger_reset(); + fsp_trigger_reset(plid); return; } diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c index a0c5a78..162d9b4 100644 --- a/hw/fsp/fsp.c +++ b/hw/fsp/fsp.c @@ -40,7 +40,13 @@ #include <ccan/list/list.h> DEFINE_LOG_ENTRY(OPAL_RC_FSP_POLL_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_FSP, - OPAL_PLATFORM_FIRMWARE, OPAL_ERROR_PANIC, OPAL_NA); + OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA); + +DEFINE_LOG_ENTRY(OPAL_RC_FSP_MBOX_ERR, OPAL_PLATFORM_ERR_EVT, OPAL_FSP, + OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA); + +DEFINE_LOG_ENTRY(OPAL_RC_FSP_DISR_HIR_MASK, OPAL_PLATFORM_ERR_EVT, OPAL_FSP, + OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA); #define FSP_TRACE_MSG #define FSP_TRACE_EVENT @@ -545,9 +551,12 @@ static void __fsp_trigger_reset(void) fsp_prep_for_reset(fsp); } -void fsp_trigger_reset(void) +static uint32_t fsp_hir_reason_plid; + +void fsp_trigger_reset(uint32_t plid) { lock(&fsp_lock); + fsp_hir_reason_plid = plid; __fsp_trigger_reset(); unlock(&fsp_lock); } @@ -683,9 +692,11 @@ static void fsp_handle_errors(struct fsp *fsp) * quite rare. */ if (fsp->state == fsp_mbx_err) { - prerror("FSP #%d: Triggering HIR on mbx_err\n", - fsp->index); - fsp_trigger_reset(); + uint32_t plid; + plid = log_simple_error(&e_info(OPAL_RC_FSP_MBOX_ERR), + "FSP #%d: Triggering HIR on mbx_err\n", + fsp->index); + fsp_trigger_reset(plid); return; } @@ -736,16 +747,20 @@ static void fsp_handle_errors(struct fsp *fsp) * to trigger a HIR so it can try to recover via the DRCR route. */ if (disr & FSP_DISR_HIR_TRIGGER_MASK) { + const char *reason = "Unknown FSP_DISR_HIR_TRIGGER"; + uint32_t plid; fsp_trace_event(fsp, TRACE_FSP_EVT_SOFT_RR, disr, 0, 0, 0); if (disr & FSP_DISR_FSP_UNIT_CHECK) - prlog(PR_DEBUG, "FSP: DISR Unit Check set\n"); + reason = "DISR Unit Check set"; else if (disr & FSP_DISR_FSP_RUNTIME_TERM) - prlog(PR_DEBUG, "FSP: DISR Runtime Terminate set\n"); + reason = "DISR Runtime Terminate set"; else if (disr & FSP_DISR_FSP_FLASH_TERM) - prlog(PR_DEBUG, "FSP: DISR Flash Terminate set\n"); - prlog(PR_NOTICE, "FSP: Triggering host initiated reset" - " sequence\n"); + reason = "DISR Flash Terminate set"; + + plid = log_simple_error(&e_info(OPAL_RC_FSP_DISR_HIR_MASK), + "FSP: %s. Triggering host initiated " + "reset.", reason); /* Clear all interrupt conditions */ fsp_wreg(fsp, FSP_HDIR_REG, FSP_DBIRQ_ALL); @@ -753,7 +768,7 @@ static void fsp_handle_errors(struct fsp *fsp) /* Make sure this happened */ fsp_rreg(fsp, FSP_HDIR_REG); - fsp_trigger_reset(); + fsp_trigger_reset(plid); return; } @@ -1318,6 +1333,21 @@ static bool fsp_local_command(u32 cmd_sub_mod, struct fsp_msg *msg) } } return true; + case FSP_CMD_GET_HIR_PLID: + /* Get Platform Log Id with reason for Host Initiated Reset */ + prlog(PR_DEBUG, "FSP: Sending PLID 0x%x as HIR reason\n", + fsp_hir_reason_plid); + resp = fsp_mkmsg(FSP_RSP_GET_HIR_PLID, 1, fsp_hir_reason_plid); + if (!resp) + prerror("FSP: Failed to allocate GET_HIR_PLID response\n"); + else { + if (fsp_queue_msg(resp, fsp_freemsg)) { + fsp_freemsg(resp); + prerror("FSP: Failed to queue GET_HIR_PLID resp\n"); + } + } + fsp_hir_reason_plid = 0; + return true; } return false; } @@ -1340,7 +1370,7 @@ static void fsp_handle_command(struct fsp_msg *msg) cmd_sub_mod = (msg->word0 & 0xff) << 16; cmd_sub_mod |= (msg->word1 & 0xff) << 8; cmd_sub_mod |= (msg->word1 >> 8) & 0xff; - + /* Some commands are handled locally */ if (fsp_local_command(cmd_sub_mod, msg)) goto free; @@ -2148,9 +2178,10 @@ static void fsp_timeout_poll(void *data __unused) fsp_complete_msg(req); __fsp_trigger_reset(); unlock(&fsp_lock); - log_simple_error(&e_info(OPAL_RC_FSP_POLL_TIMEOUT), - "FSP: Response from FSP timed out, word0 = %x," - "word1 = %x state: %d\n", w0, w1, mstate); + fsp_hir_reason_plid = log_simple_error( + &e_info(OPAL_RC_FSP_POLL_TIMEOUT), + "FSP: Response from FSP timed out, word0 = %x," + "word1 = %x state: %d\n", w0, w1, mstate); } next_bit: cmdclass_resp_bitmask = cmdclass_resp_bitmask >> 1; diff --git a/include/errorlog.h b/include/errorlog.h index 247198b..e9d5ad8 100644 --- a/include/errorlog.h +++ b/include/errorlog.h @@ -259,6 +259,7 @@ enum opal_reasoncode { OPAL_RC_SURVE_INIT = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x10, OPAL_RC_SURVE_STATUS = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x11, OPAL_RC_SURVE_ACK = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x12, + OPAL_INJECTED_HIR = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x13, /* SYSPARAM */ OPAL_RC_SYSPARM_INIT = OPAL_SRC_COMPONENT_SYSPARAM | 0x10, OPAL_RC_SYSPARM_MSG = OPAL_SRC_COMPONENT_SYSPARAM | 0x11, @@ -313,8 +314,9 @@ enum opal_reasoncode { OPAL_RC_SLW_GET = OPAL_SRC_COMPONENT_SLW | 0x12, OPAL_RC_SLW_REG = OPAL_SRC_COMPONENT_SLW | 0x13, /* FSP */ - OPAL_RC_FSP_POLL_TIMEOUT - = OPAL_SRC_COMPONENT_FSP | 0x10, + OPAL_RC_FSP_POLL_TIMEOUT = OPAL_SRC_COMPONENT_FSP | 0x10, + OPAL_RC_FSP_MBOX_ERR = OPAL_SRC_COMPONENT_FSP | 0x11, + OPAL_RC_FSP_DISR_HIR_MASK = OPAL_SRC_COMPONENT_FSP | 0x12, /* I2C */ OPAL_RC_I2C_INIT = OPAL_SRC_COMPONENT_I2C | 0X10, OPAL_RC_I2C_START_REQ = OPAL_SRC_COMPONENT_I2C | 0X11, @@ -339,9 +341,12 @@ severity, subtype) static struct opal_err_info err_##reason = \ /* This is wrapper around the error log function, which creates * and commits the error to FSP. - * Used for simple error logging + * Used for simple error logging. + * Returns a Log ID, if an error involves a service processor needing + * to be kicked, this logid can be sent to the service processor explaining + * *why* we kicked it. Log Id = -1 on error. */ -void log_simple_error(struct opal_err_info *e_info, +uint32_t log_simple_error(struct opal_err_info *e_info, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); #define e_info(reason_code) err_##reason_code diff --git a/include/fsp.h b/include/fsp.h index f75b6ad..e7f6a7b 100644 --- a/include/fsp.h +++ b/include/fsp.h @@ -368,6 +368,10 @@ #define FSP_CMD_DEEP_REBOOT 0x1ce4e04 /* HV->FSP: Deep IPL */ #define FSP_CMD_INIT_DPO 0x0ce5b00 /* FSP->HV: Initialize Delayed Power Off */ #define FSP_RSP_INIT_DPO 0x0cedb00 /* HV->FSP: Response for DPO init command */ +#define FSP_CMD_GET_HIR_PLID 0x0ce0900 /* FSP->HV: Get Platform Log ID with + * reason for Host Initiated Reset. + */ +#define FSP_RSP_GET_HIR_PLID 0x0ce8900 /* HV->FSP: Reply with PLID */ #define FSP_CMD_PANELSTATUS 0x0ce5c00 /* FSP->HV */ #define FSP_CMD_PANELSTATUS_EX1 0x0ce5c02 /* FSP->HV */ #define FSP_CMD_PANELSTATUS_EX2 0x0ce5c03 /* FSP->HV */ @@ -808,7 +812,7 @@ extern void fsp_ipmi_init(void); /* Reset/Reload */ extern void fsp_reinit_fsp(void); -extern void fsp_trigger_reset(void); +extern void fsp_trigger_reset(uint32_t plid); extern void fsp_reset_links(void); extern bool fsp_in_rr(void); |