aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStewart Smith <stewart@linux.vnet.ibm.com>2017-05-05 15:55:28 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2017-05-10 14:32:17 +1000
commitf3a5741408a11be6992cf8779f2eae10b08c020a (patch)
tree568e079d35f41cab7cbe0e3d7eba1467d4122424 /hw
parentd55194c5d9ada77eee2c9a69814708304f34d334 (diff)
downloadskiboot-f3a5741408a11be6992cf8779f2eae10b08c020a.zip
skiboot-f3a5741408a11be6992cf8779f2eae10b08c020a.tar.gz
skiboot-f3a5741408a11be6992cf8779f2eae10b08c020a.tar.bz2
FSP: Notify FSP of Platform Log ID after Host Initiated Reset Reload
Trigging a Host Initiated Reset (when the host detects the FSP has gone out to lunch and should be rebooted), would cause "Unknown Command" messages to appear in the OPAL log. This patch implements those messages How to trigger FSP RR(HIR): $ putmemproc 300000f8 0x00000000deadbeef s1 k0:n0:s0:p00 ecmd_ppc putmemproc 300000f8 0x00000000deadbeef Log showing unknown command: / # cat /sys/firmware/opal/msglog | grep -i ,3 [ 110.232114723,3] FSP: fsp_trigger_reset() entry [ 188.431793837,3] FSP #0: Link down, starting R&R [ 464.109239162,3] FSP #0: Got XUP with no pending message ! [ 466.340598554,3] FSP-DPO: Unknown command 0xce0900 [ 466.340600126,3] FSP: Unhandled message ce0900 The message we need to handle is "Get PLID after host initiated FipS reset/reload". When the FSP comes back from HIR, it asks "hey, so, which error log explains why you rebooted me?". So, we tell it. Reported-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/fsp/fsp-surveillance.c11
-rw-r--r--hw/fsp/fsp.c61
2 files changed, 50 insertions, 22 deletions
diff --git a/hw/fsp/fsp-surveillance.c b/hw/fsp/fsp-surveillance.c
index d3e5c45..202b093 100644
--- a/hw/fsp/fsp-surveillance.c
+++ b/hw/fsp/fsp-surveillance.c
@@ -82,15 +82,12 @@ static void fsp_surv_check_timeout(void)
* just go ahead and check timeouts.
*/
if (tb_compare(now, surv_ack_timer) == TB_AAFTERB) {
- /* XXX: We should be logging a PEL to the host, assuming
- * the FSP is dead, pending a R/R.
- */
- log_simple_error(&e_info(OPAL_RC_SURVE_ACK),
+ uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_ACK),
"SURV: Surv ACK timed out; initiating R/R\n");
/* Reset the pending trigger too */
fsp_surv_ack_pending = false;
- fsp_trigger_reset();
+ fsp_trigger_reset(plid);
}
return;
@@ -149,10 +146,10 @@ static void fsp_surv_got_param(uint32_t param_id __unused, int err_len,
void *data __unused)
{
if (err_len != 4) {
- log_simple_error(&e_info(OPAL_RC_SURVE_STATUS),
+ uint32_t plid = log_simple_error(&e_info(OPAL_RC_SURVE_STATUS),
"SURV: Error (%d) retrieving surv status; initiating R/R\n",
err_len);
- fsp_trigger_reset();
+ fsp_trigger_reset(plid);
return;
}
diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
index a0c5a78..162d9b4 100644
--- a/hw/fsp/fsp.c
+++ b/hw/fsp/fsp.c
@@ -40,7 +40,13 @@
#include <ccan/list/list.h>
DEFINE_LOG_ENTRY(OPAL_RC_FSP_POLL_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
- OPAL_PLATFORM_FIRMWARE, OPAL_ERROR_PANIC, OPAL_NA);
+ OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA);
+
+DEFINE_LOG_ENTRY(OPAL_RC_FSP_MBOX_ERR, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
+ OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA);
+
+DEFINE_LOG_ENTRY(OPAL_RC_FSP_DISR_HIR_MASK, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
+ OPAL_PLATFORM_FIRMWARE, OPAL_RECOVERED_ERR_GENERAL, OPAL_NA);
#define FSP_TRACE_MSG
#define FSP_TRACE_EVENT
@@ -545,9 +551,12 @@ static void __fsp_trigger_reset(void)
fsp_prep_for_reset(fsp);
}
-void fsp_trigger_reset(void)
+static uint32_t fsp_hir_reason_plid;
+
+void fsp_trigger_reset(uint32_t plid)
{
lock(&fsp_lock);
+ fsp_hir_reason_plid = plid;
__fsp_trigger_reset();
unlock(&fsp_lock);
}
@@ -683,9 +692,11 @@ static void fsp_handle_errors(struct fsp *fsp)
* quite rare.
*/
if (fsp->state == fsp_mbx_err) {
- prerror("FSP #%d: Triggering HIR on mbx_err\n",
- fsp->index);
- fsp_trigger_reset();
+ uint32_t plid;
+ plid = log_simple_error(&e_info(OPAL_RC_FSP_MBOX_ERR),
+ "FSP #%d: Triggering HIR on mbx_err\n",
+ fsp->index);
+ fsp_trigger_reset(plid);
return;
}
@@ -736,16 +747,20 @@ static void fsp_handle_errors(struct fsp *fsp)
* to trigger a HIR so it can try to recover via the DRCR route.
*/
if (disr & FSP_DISR_HIR_TRIGGER_MASK) {
+ const char *reason = "Unknown FSP_DISR_HIR_TRIGGER";
+ uint32_t plid;
fsp_trace_event(fsp, TRACE_FSP_EVT_SOFT_RR, disr, 0, 0, 0);
if (disr & FSP_DISR_FSP_UNIT_CHECK)
- prlog(PR_DEBUG, "FSP: DISR Unit Check set\n");
+ reason = "DISR Unit Check set";
else if (disr & FSP_DISR_FSP_RUNTIME_TERM)
- prlog(PR_DEBUG, "FSP: DISR Runtime Terminate set\n");
+ reason = "DISR Runtime Terminate set";
else if (disr & FSP_DISR_FSP_FLASH_TERM)
- prlog(PR_DEBUG, "FSP: DISR Flash Terminate set\n");
- prlog(PR_NOTICE, "FSP: Triggering host initiated reset"
- " sequence\n");
+ reason = "DISR Flash Terminate set";
+
+ plid = log_simple_error(&e_info(OPAL_RC_FSP_DISR_HIR_MASK),
+ "FSP: %s. Triggering host initiated "
+ "reset.", reason);
/* Clear all interrupt conditions */
fsp_wreg(fsp, FSP_HDIR_REG, FSP_DBIRQ_ALL);
@@ -753,7 +768,7 @@ static void fsp_handle_errors(struct fsp *fsp)
/* Make sure this happened */
fsp_rreg(fsp, FSP_HDIR_REG);
- fsp_trigger_reset();
+ fsp_trigger_reset(plid);
return;
}
@@ -1318,6 +1333,21 @@ static bool fsp_local_command(u32 cmd_sub_mod, struct fsp_msg *msg)
}
}
return true;
+ case FSP_CMD_GET_HIR_PLID:
+ /* Get Platform Log Id with reason for Host Initiated Reset */
+ prlog(PR_DEBUG, "FSP: Sending PLID 0x%x as HIR reason\n",
+ fsp_hir_reason_plid);
+ resp = fsp_mkmsg(FSP_RSP_GET_HIR_PLID, 1, fsp_hir_reason_plid);
+ if (!resp)
+ prerror("FSP: Failed to allocate GET_HIR_PLID response\n");
+ else {
+ if (fsp_queue_msg(resp, fsp_freemsg)) {
+ fsp_freemsg(resp);
+ prerror("FSP: Failed to queue GET_HIR_PLID resp\n");
+ }
+ }
+ fsp_hir_reason_plid = 0;
+ return true;
}
return false;
}
@@ -1340,7 +1370,7 @@ static void fsp_handle_command(struct fsp_msg *msg)
cmd_sub_mod = (msg->word0 & 0xff) << 16;
cmd_sub_mod |= (msg->word1 & 0xff) << 8;
cmd_sub_mod |= (msg->word1 >> 8) & 0xff;
-
+
/* Some commands are handled locally */
if (fsp_local_command(cmd_sub_mod, msg))
goto free;
@@ -2148,9 +2178,10 @@ static void fsp_timeout_poll(void *data __unused)
fsp_complete_msg(req);
__fsp_trigger_reset();
unlock(&fsp_lock);
- log_simple_error(&e_info(OPAL_RC_FSP_POLL_TIMEOUT),
- "FSP: Response from FSP timed out, word0 = %x,"
- "word1 = %x state: %d\n", w0, w1, mstate);
+ fsp_hir_reason_plid = log_simple_error(
+ &e_info(OPAL_RC_FSP_POLL_TIMEOUT),
+ "FSP: Response from FSP timed out, word0 = %x,"
+ "word1 = %x state: %d\n", w0, w1, mstate);
}
next_bit:
cmdclass_resp_bitmask = cmdclass_resp_bitmask >> 1;