aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/errorlog.c13
-rw-r--r--core/opal.c12
-rw-r--r--hw/fsp/fsp-surveillance.c11
-rw-r--r--hw/fsp/fsp.c61
-rw-r--r--include/errorlog.h13
-rw-r--r--include/fsp.h6
6 files changed, 83 insertions, 33 deletions
diff --git a/core/errorlog.c b/core/errorlog.c
index 179e09f..522dfcc 100644
--- a/core/errorlog.c
+++ b/core/errorlog.c
@@ -1,4 +1,4 @@
-/* Copyright 2013-2016 IBM Corp.
+/* Copyright 2013-2017 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -196,7 +196,7 @@ void log_append_msg(struct errorlog *buf, const char *fmt, ...)
log_append_data(buf, err_msg, strlen(err_msg));
}
-void log_simple_error(struct opal_err_info *e_info, const char *fmt, ...)
+uint32_t log_simple_error(struct opal_err_info *e_info, const char *fmt, ...)
{
struct errorlog *buf;
va_list list;
@@ -212,10 +212,13 @@ void log_simple_error(struct opal_err_info *e_info, const char *fmt, ...)
buf = opal_elog_create(e_info, 0);
if (buf == NULL) {
prerror("ELOG: Error getting buffer to log error\n");
- } else {
- log_append_data(buf, err_msg, strlen(err_msg));
- log_commit(buf);
+ return -1;
}
+
+ log_append_data(buf, err_msg, strlen(err_msg));
+ log_commit(buf);
+
+ return buf->plid;
}
int elog_init(void)
diff --git a/core/opal.c b/core/opal.c
index 73223b1..14357cc 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -30,6 +30,7 @@
#include <opal-msg.h>
#include <timer.h>
#include <elf-abi.h>
+#include <errorlog.h>
/* Pending events to signal via opal_poll_events */
uint64_t opal_pending_events;
@@ -51,6 +52,13 @@ static uint64_t opal_dynamic_events;
extern uint32_t attn_trigger;
extern uint32_t hir_trigger;
+/* We make this look like a Surveillance error, even though it really
+ * isn't one.
+ */
+DEFINE_LOG_ENTRY(OPAL_INJECTED_HIR, OPAL_MISC_ERR_EVT, OPAL_SURVEILLANCE,
+ OPAL_SURVEILLANCE_ERR, OPAL_PREDICTIVE_ERR_GENERAL,
+ OPAL_MISCELLANEOUS_INFO_ONLY);
+
void opal_table_init(void)
{
struct opal_table_entry *s = __opal_table_start;
@@ -408,7 +416,9 @@ static int64_t opal_poll_events(__be64 *outstanding_event_mask)
/* Test the host initiated reset */
if (hir_trigger == 0xdeadbeef) {
- fsp_trigger_reset();
+ uint32_t plid = log_simple_error(&e_info(OPAL_INJECTED_HIR),
+ "SURV: Injected HIR, initiating FSP R/R\n");
+ fsp_trigger_reset(plid);
hir_trigger = 0;
}
diff --git a/hw/fsp/fsp-surveillance.c b/hw/fsp/fsp-surveillance.c
index d3e5c45..202b093 100644
--- a/hw/fsp/fsp-surveillance.c
+++ b/hw/fsp/fsp-surveillance.c
@@ -82,15 +82,12 @@ static void fsp_surv_check_timeout(void)
* just go ahead and check timeouts.
*/
if (tb_compare(now, surv_ack_timer) == TB_AAFTERB) {
- /* XXX: We should be logging a PEL to the host, assuming
- * the FSP is dead, pending a R/R.
- */
- log_simple_error(&e_info(OPAL_RC_SURVE_ACK),
+ uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_ACK),
"SURV: Surv ACK timed out; initiating R/R\n");
/* Reset the pending trigger too */
fsp_surv_ack_pending = false;
- fsp_trigger_reset();
+ fsp_trigger_reset(plid);
}
return;
@@ -149,10 +146,10 @@ static void fsp_surv_got_param(uint32_t param_id __unused, int err_len,
void *data __unused)
{
if (err_len != 4) {
- log_simple_error(&e_info(OPAL_RC_SURVE_STATUS),
+ uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_STATUS),
"SURV: Error (%d) retrieving surv status; initiating R/R\n",
err_len);
- fsp_trigger_reset();
+ fsp_trigger_reset(plid);
return;
}
diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
index a0c5a78..162d9b4 100644
--- a/hw/fsp/fsp.c
+++ b/hw/fsp/fsp.c
@@ -40,7 +40,13 @@
#include <ccan/list/list.h>
DEFINE_LOG_ENTRY(OPAL_RC_FSP_POLL_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
- OPAL_PLATFORM_FIRMWARE, OPAL_ERROR_PANIC, OPAL_NA);
+ OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA);
+
+DEFINE_LOG_ENTRY(OPAL_RC_FSP_MBOX_ERR, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
+ OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA);
+
+DEFINE_LOG_ENTRY(OPAL_RC_FSP_DISR_HIR_MASK, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
+ OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA);
#define FSP_TRACE_MSG
#define FSP_TRACE_EVENT
@@ -545,9 +551,12 @@ static void __fsp_trigger_reset(void)
fsp_prep_for_reset(fsp);
}
-void fsp_trigger_reset(void)
+static uint32_t fsp_hir_reason_plid;
+
+void fsp_trigger_reset(uint32_t plid)
{
lock(&fsp_lock);
+ fsp_hir_reason_plid = plid;
__fsp_trigger_reset();
unlock(&fsp_lock);
}
@@ -683,9 +692,11 @@ static void fsp_handle_errors(struct fsp *fsp)
* quite rare.
*/
if (fsp->state == fsp_mbx_err) {
- prerror("FSP #%d: Triggering HIR on mbx_err\n",
- fsp->index);
- fsp_trigger_reset();
+ uint32_t plid;
+ plid = log_simple_error(&e_info(OPAL_RC_FSP_MBOX_ERR),
+ "FSP #%d: Triggering HIR on mbx_err\n",
+ fsp->index);
+ fsp_trigger_reset(plid);
return;
}
@@ -736,16 +747,20 @@ static void fsp_handle_errors(struct fsp *fsp)
* to trigger a HIR so it can try to recover via the DRCR route.
*/
if (disr & FSP_DISR_HIR_TRIGGER_MASK) {
+ const char *reason = "Unknown FSP_DISR_HIR_TRIGGER";
+ uint32_t plid;
fsp_trace_event(fsp, TRACE_FSP_EVT_SOFT_RR, disr, 0, 0, 0);
if (disr & FSP_DISR_FSP_UNIT_CHECK)
- prlog(PR_DEBUG, "FSP: DISR Unit Check set\n");
+ reason = "DISR Unit Check set";
else if (disr & FSP_DISR_FSP_RUNTIME_TERM)
- prlog(PR_DEBUG, "FSP: DISR Runtime Terminate set\n");
+ reason = "DISR Runtime Terminate set";
else if (disr & FSP_DISR_FSP_FLASH_TERM)
- prlog(PR_DEBUG, "FSP: DISR Flash Terminate set\n");
- prlog(PR_NOTICE, "FSP: Triggering host initiated reset"
- " sequence\n");
+ reason = "DISR Flash Terminate set";
+
+ plid = log_simple_error(&e_info(OPAL_RC_FSP_DISR_HIR_MASK),
+ "FSP: %s. Triggering host initiated "
+ "reset.", reason);
/* Clear all interrupt conditions */
fsp_wreg(fsp, FSP_HDIR_REG, FSP_DBIRQ_ALL);
@@ -753,7 +768,7 @@ static void fsp_handle_errors(struct fsp *fsp)
/* Make sure this happened */
fsp_rreg(fsp, FSP_HDIR_REG);
- fsp_trigger_reset();
+ fsp_trigger_reset(plid);
return;
}
@@ -1318,6 +1333,21 @@ static bool fsp_local_command(u32 cmd_sub_mod, struct fsp_msg *msg)
}
}
return true;
+ case FSP_CMD_GET_HIR_PLID:
+ /* Get Platform Log Id with reason for Host Initiated Reset */
+ prlog(PR_DEBUG, "FSP: Sending PLID 0x%x as HIR reason\n",
+ fsp_hir_reason_plid);
+ resp = fsp_mkmsg(FSP_RSP_GET_HIR_PLID, 1, fsp_hir_reason_plid);
+ if (!resp)
+ prerror("FSP: Failed to allocate GET_HIR_PLID response\n");
+ else {
+ if (fsp_queue_msg(resp, fsp_freemsg)) {
+ fsp_freemsg(resp);
+ prerror("FSP: Failed to queue GET_HIR_PLID resp\n");
+ }
+ }
+ fsp_hir_reason_plid = 0;
+ return true;
}
return false;
}
@@ -1340,7 +1370,7 @@ static void fsp_handle_command(struct fsp_msg *msg)
cmd_sub_mod = (msg->word0 & 0xff) << 16;
cmd_sub_mod |= (msg->word1 & 0xff) << 8;
cmd_sub_mod |= (msg->word1 >> 8) & 0xff;
-
+
/* Some commands are handled locally */
if (fsp_local_command(cmd_sub_mod, msg))
goto free;
@@ -2148,9 +2178,10 @@ static void fsp_timeout_poll(void *data __unused)
fsp_complete_msg(req);
__fsp_trigger_reset();
unlock(&fsp_lock);
- log_simple_error(&e_info(OPAL_RC_FSP_POLL_TIMEOUT),
- "FSP: Response from FSP timed out, word0 = %x,"
- "word1 = %x state: %d\n", w0, w1, mstate);
+ fsp_hir_reason_plid = log_simple_error(
+ &e_info(OPAL_RC_FSP_POLL_TIMEOUT),
+ "FSP: Response from FSP timed out, word0 = %x,"
+ "word1 = %x state: %d\n", w0, w1, mstate);
}
next_bit:
cmdclass_resp_bitmask = cmdclass_resp_bitmask >> 1;
diff --git a/include/errorlog.h b/include/errorlog.h
index 247198b..e9d5ad8 100644
--- a/include/errorlog.h
+++ b/include/errorlog.h
@@ -259,6 +259,7 @@ enum opal_reasoncode {
OPAL_RC_SURVE_INIT = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x10,
OPAL_RC_SURVE_STATUS = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x11,
OPAL_RC_SURVE_ACK = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x12,
+ OPAL_INJECTED_HIR = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x13,
/* SYSPARAM */
OPAL_RC_SYSPARM_INIT = OPAL_SRC_COMPONENT_SYSPARAM | 0x10,
OPAL_RC_SYSPARM_MSG = OPAL_SRC_COMPONENT_SYSPARAM | 0x11,
@@ -313,8 +314,9 @@ enum opal_reasoncode {
OPAL_RC_SLW_GET = OPAL_SRC_COMPONENT_SLW | 0x12,
OPAL_RC_SLW_REG = OPAL_SRC_COMPONENT_SLW | 0x13,
/* FSP */
- OPAL_RC_FSP_POLL_TIMEOUT
- = OPAL_SRC_COMPONENT_FSP | 0x10,
+ OPAL_RC_FSP_POLL_TIMEOUT = OPAL_SRC_COMPONENT_FSP | 0x10,
+ OPAL_RC_FSP_MBOX_ERR = OPAL_SRC_COMPONENT_FSP | 0x11,
+ OPAL_RC_FSP_DISR_HIR_MASK = OPAL_SRC_COMPONENT_FSP | 0x12,
/* I2C */
OPAL_RC_I2C_INIT = OPAL_SRC_COMPONENT_I2C | 0X10,
OPAL_RC_I2C_START_REQ = OPAL_SRC_COMPONENT_I2C | 0X11,
@@ -339,9 +341,12 @@ severity, subtype) static struct opal_err_info err_##reason = \
/* This is wrapper around the error log function, which creates
* and commits the error to FSP.
- * Used for simple error logging
+ * Used for simple error logging.
+ * Returns a Log ID, if an error involves a service processor needing
+ * to be kicked, this logid can be sent to the service processor explaining
+ * *why* we kicked it. Log Id = -1 on error.
*/
-void log_simple_error(struct opal_err_info *e_info,
+uint32_t log_simple_error(struct opal_err_info *e_info,
const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
#define e_info(reason_code) err_##reason_code
diff --git a/include/fsp.h b/include/fsp.h
index f75b6ad..e7f6a7b 100644
--- a/include/fsp.h
+++ b/include/fsp.h
@@ -368,6 +368,10 @@
#define FSP_CMD_DEEP_REBOOT 0x1ce4e04 /* HV->FSP: Deep IPL */
#define FSP_CMD_INIT_DPO 0x0ce5b00 /* FSP->HV: Initialize Delayed Power Off */
#define FSP_RSP_INIT_DPO 0x0cedb00 /* HV->FSP: Response for DPO init command */
+#define FSP_CMD_GET_HIR_PLID 0x0ce0900 /* FSP->HV: Get Platform Log ID with
+ * reason for Host Initiated Reset.
+ */
+#define FSP_RSP_GET_HIR_PLID 0x0ce8900 /* HV->FSP: Reply with PLID */
#define FSP_CMD_PANELSTATUS 0x0ce5c00 /* FSP->HV */
#define FSP_CMD_PANELSTATUS_EX1 0x0ce5c02 /* FSP->HV */
#define FSP_CMD_PANELSTATUS_EX2 0x0ce5c03 /* FSP->HV */
@@ -808,7 +812,7 @@ extern void fsp_ipmi_init(void);
/* Reset/Reload */
extern void fsp_reinit_fsp(void);
-extern void fsp_trigger_reset(void);
+extern void fsp_trigger_reset(uint32_t plid);
extern void fsp_reset_links(void);
extern bool fsp_in_rr(void);