aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVasant Hegde <hegdevasant@linux.vnet.ibm.com>2017-06-09 22:49:05 +0530
committerStewart Smith <stewart@linux.vnet.ibm.com>2017-08-18 13:30:38 +1000
commit52093328057c2d87546c5b21f143282c96c8530e (patch)
tree87afb361c28cc7bd96aef3ed67fcf64d8bd7f0ac
parent6e8c934e6eb71c0a8f8d465846260bf38a352b61 (diff)
downloadskiboot-52093328057c2d87546c5b21f143282c96c8530e.zip
skiboot-52093328057c2d87546c5b21f143282c96c8530e.tar.gz
skiboot-52093328057c2d87546c5b21f143282c96c8530e.tar.bz2
FSP/CONSOLE: Workaround for unresponsive ipmi daemon
We use TCE mapped area to write data to console. Console header (fsp_serbuf_hdr) is modified by both FSP and OPAL (OPAL updates next_in pointer in fsp_serbuf_hdr and FSP updates next_out pointer). Kernel makes opal_console_write() OPAL call to write data to console. OPAL write data to TCE mapped area and sends MBOX command to FSP. If our console becomes full and we have data to write to console, we keep on waiting until FSP reads data. In some corner cases, where FSP is active but not responding to console MBOX message (due to buggy IPMI) and we have heavy console write happening from kernel, then eventually our console buffer becomes full. At this point OPAL starts sending OPAL_BUSY_EVENT to kernel. Kernel will keep on retrying. This is creating kernel soft lockups. In some extreme case when every CPU is trying to write to console, user will not be able to ssh and thinks system is hang. If we reset FSP or restart IPMI daemon on FSP, system recovers and everything becomes normal. This patch adds workaround to above issue by returning OPAL_HARDWARE when cosole is full. Side effect of this patch is, we may endup dropping latest console data. But better to drop console data than system hang. Alternative approach is to drop old data from console buffer, make space for new data. But in normal condition only FSP can update 'next_out' pointer and if we touch that pointer, it may introduce some other race conditions. Hence we decided to just new console write request. Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com> Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com> (cherry picked from commit c8a7535f3539c79955645e6b3714b367a994b1e9) Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
-rw-r--r--hw/fsp/fsp-console.c18
-rw-r--r--include/errorlog.h4
2 files changed, 21 insertions, 1 deletions
diff --git a/hw/fsp/fsp-console.c b/hw/fsp/fsp-console.c
index 2a25e1c..ee8090e 100644
--- a/hw/fsp/fsp-console.c
+++ b/hw/fsp/fsp-console.c
@@ -26,6 +26,11 @@
#include <timebase.h>
#include <device.h>
#include <fsp-sysparam.h>
+#include <errorlog.h>
+
+DEFINE_LOG_ENTRY(OPAL_RC_CONSOLE_HANG, OPAL_PLATFORM_ERR_EVT, OPAL_CONSOLE,
+ OPAL_PLATFORM_FIRMWARE,
+ OPAL_PREDICTIVE_ERR_GENERAL, OPAL_NA);
struct fsp_serbuf_hdr {
u16 partition_id;
@@ -608,7 +613,18 @@ static int64_t fsp_console_write(int64_t term_number, int64_t *length,
*length = written;
unlock(&fsp_con_lock);
- return written ? OPAL_SUCCESS : OPAL_BUSY_EVENT;
+ if (written)
+ return OPAL_SUCCESS;
+
+ /*
+ * FSP is still active but not reading console data. Hence
+ * our console buffer became full. Most likely IPMI daemon
+ * on FSP is buggy. Lets log error and return OPAL_HARDWARE
+ * to payload (Linux).
+ */
+ log_simple_error(&e_info(OPAL_RC_CONSOLE_HANG), "FSPCON: Console "
+ "buffer is full, dropping console data\n");
+ return OPAL_HARDWARE;
}
static int64_t fsp_console_write_buffer_space(int64_t term_number,
diff --git a/include/errorlog.h b/include/errorlog.h
index f593636..7d8b5b7 100644
--- a/include/errorlog.h
+++ b/include/errorlog.h
@@ -219,6 +219,7 @@ struct opal_err_info {
#define OPAL_MF 0x1700
#define OPAL_DU 0x1800
#define OPAL_LE 0x1900
+#define OPAL_SRC_COMPONENT_CONSOLE 0x1b00
#define OPAL_SE 0x2000
#define OPAL_SL 0x2100
#define OPAL_FP 0x2200
@@ -330,6 +331,9 @@ enum opal_reasoncode {
/* Platform error */
OPAL_RC_ABNORMAL_REBOOT = OPAL_CE | 0x10,
+
+/* FSP console */
+ OPAL_RC_CONSOLE_HANG = OPAL_SRC_COMPONENT_CONSOLE | 0x10,
};
#define DEFINE_LOG_ENTRY(reason, type, id, subsys, \