aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2019-05-23 14:21:35 +0200
committerStewart Smith <stewart@linux.ibm.com>2019-06-04 13:34:58 +1000
commit2a7e5cb685b83b7d70018636ec95f81c7c60270a (patch)
tree70cacc43442aaa2793a5dc9787e2cc0b1cfcbced /core
parentce49505c2fafabc0fa4a4a5d1e34f6dd2dd7fb8f (diff)
downloadskiboot-2a7e5cb685b83b7d70018636ec95f81c7c60270a.zip
skiboot-2a7e5cb685b83b7d70018636ec95f81c7c60270a.tar.gz
skiboot-2a7e5cb685b83b7d70018636ec95f81c7c60270a.tar.bz2
opal/hmi: Report NPU2 checkstop reason
The NPU2 is currently not passing any information to linux to explain the cause of an HMI. NPU2 has three Fault Isolation Registers and over 30 of those FIR bits are configured to raise an HMI by default. We won't be able to fit all possible state in the 32-bit xstop_reason field of the HMI event, but we can still try to encode up to 4 HMI reasons. Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Diffstat (limited to 'core')
-rw-r--r--core/hmi.c44
1 files changed, 44 insertions, 0 deletions
diff --git a/core/hmi.c b/core/hmi.c
index 709f16b..5cde671 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -576,6 +576,46 @@ static bool phb_is_npu2(struct dt_node *dn)
dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex"));
}
+static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
+{
+ int i, reason_count;
+ uint8_t *ptr;
+
+ reason_count = sizeof(*xstop_reason) / sizeof(reason);
+ ptr = (uint8_t *) xstop_reason;
+ for (i = 0; i < reason_count; i++) {
+ if (*ptr == 0) {
+ *ptr = reason;
+ break;
+ }
+ ptr++;
+ }
+}
+
+static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
+ uint64_t fir, int fir_number)
+{
+ int bit;
+ uint8_t reason;
+
+ /*
+ * There are three 64-bit FIRs but the xstop reason field of
+ * the hmi event is only 32-bit. Encode which FIR bit is set as:
+ * - 2 bits for the FIR number
+ * - 6 bits for the bit number (0 -> 63)
+ *
+ * So we could even encode up to 4 reasons for the HMI, if
+ * that can ever happen
+ */
+ while (fir) {
+ bit = ilog2(fir);
+ reason = fir_number << 6;
+ reason |= (63 - bit); // IBM numbering
+ add_npu2_xstop_reason(xstop_reason, reason);
+ fir ^= 1ULL << bit;
+ }
+}
+
static void find_npu2_checkstop_reason(int flat_chip_id,
struct OpalHMIEvent *hmi_evt,
uint64_t *out_flags)
@@ -592,6 +632,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
uint64_t npu2_fir_action0_addr;
uint64_t npu2_fir_action1_addr;
uint64_t fatal_errors;
+ uint32_t xstop_reason = 0;
int total_errors = 0;
const char *loc;
@@ -635,6 +676,8 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
prlog(PR_ERR, "NPU: [Loc: %s] P:%d ACTION0 0x%016llx, ACTION1 0x%016llx\n",
loc, flat_chip_id, npu2_fir_action0, npu2_fir_action1);
total_errors++;
+
+ encode_npu2_xstop_reason(&xstop_reason, fatal_errors, i);
}
/* Can't do a fence yet, we are just logging fir information for now */
@@ -667,6 +710,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
hmi_evt->severity = OpalHMI_SEV_WARNING;
hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.xstop_reason = xstop_reason;
hmi_evt->u.xstop_error.u.chip_id = flat_chip_id;
/* Marking the event as recoverable so that we don't crash */