aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/hmi.c276
1 files changed, 132 insertions, 144 deletions
diff --git a/core/hmi.c b/core/hmi.c
index 9363cc5..ce5abd7 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -19,8 +19,10 @@
#include <pci.h>
#include <cpu.h>
#include <chip.h>
+#include <pau-regs.h>
#include <npu-regs.h>
#include <npu2-regs.h>
+#include <pau.h>
#include <npu2.h>
#include <npu.h>
#include <capp.h>
@@ -717,13 +719,7 @@ static void find_nx_checkstop_reason(int flat_chip_id,
queue_hmi_event(hmi_evt, 0, out_flags);
}
-static bool phb_is_npu2(struct dt_node *dn)
-{
- return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
- dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex"));
-}
-
-static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
+static void add_npu_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
{
int i, reason_count;
uint8_t *ptr;
@@ -739,8 +735,8 @@ static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
}
}
-static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
- uint64_t fir, int fir_number)
+static void encode_npu_xstop_reason(uint32_t *xstop_reason,
+ uint64_t fir, int fir_number)
{
int bit;
uint8_t reason;
@@ -758,114 +754,125 @@ static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
bit = ilog2(fir);
reason = fir_number << 6;
reason |= (63 - bit); // IBM numbering
- add_npu2_xstop_reason(xstop_reason, reason);
+ add_npu_xstop_reason(xstop_reason, reason);
fir ^= 1ULL << bit;
}
}
-static void find_npu2_checkstop_reason(int flat_chip_id,
- struct OpalHMIEvent *hmi_evt,
- uint64_t *out_flags)
+static bool npu_fir_errors(struct phb *phb, int flat_chip_id,
+ uint32_t *xstop_reason)
{
- struct phb *phb;
- int i;
- bool npu2_hmi_verbose = false, found = false;
- uint64_t npu2_fir;
- uint64_t npu2_fir_mask;
- uint64_t npu2_fir_action0;
- uint64_t npu2_fir_action1;
- uint64_t npu2_fir_addr;
- uint64_t npu2_fir_mask_addr;
- uint64_t npu2_fir_action0_addr;
- uint64_t npu2_fir_action1_addr;
+ uint64_t fir, fir_mask;
+ uint64_t fir_action0, fir_action1;
+ uint64_t fir_reg, fir_mask_reg;
+ uint64_t fir_action0_reg, fir_action1_reg;
uint64_t fatal_errors;
- uint32_t xstop_reason = 0;
- int total_errors = 0;
+ uint64_t xscom_base;
+ bool fir_errors = false;
+ int fir_regs;
const char *loc;
-
- /* NPU2 only */
- if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9)
- return;
-
- /* Find the NPU on the chip associated with the HMI. */
- for_each_phb(phb) {
- /* NOTE: if a chip ever has >1 NPU this will need adjusting */
- if (phb_is_npu2(phb->dt_node) &&
- (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
- found = true;
- break;
+ struct npu *npu;
+ struct npu2 *npu2 = NULL;
+ struct npu2_dev *dev;
+ struct pau *pau;
+
+ fir_regs = (phb->phb_type == phb_type_pcie_v3) ? 1 : 3;
+
+ for (uint32_t i = 0; i < fir_regs; i++) {
+ switch (phb->phb_type) {
+ case phb_type_pcie_v3:
+ fir_reg = NX_FIR;
+ fir_mask_reg = NX_FIR_MASK;
+ fir_action0_reg = NX_FIR_ACTION0;
+ fir_action1_reg = NX_FIR_ACTION1;
+
+ npu = phb_to_npu(phb);
+ if (npu != NULL)
+ xscom_base = npu->at_xscom;
+ else
+ continue;
+ break;
+ case phb_type_npu_v2:
+ fir_reg = NPU2_FIR(i);
+ fir_mask_reg = NPU2_FIR_MASK(i);
+ fir_action0_reg = NPU2_FIR_ACTION0(i);
+ fir_action1_reg = NPU2_FIR_ACTION1(i);
+ npu2 = phb_to_npu2_nvlink(phb);
+ xscom_base = npu2->xscom_base;
+ break;
+ case phb_type_npu_v2_opencapi:
+ fir_reg = NPU2_FIR(i);
+ fir_mask_reg = NPU2_FIR_MASK(i);
+ fir_action0_reg = NPU2_FIR_ACTION0(i);
+ fir_action1_reg = NPU2_FIR_ACTION1(i);
+ dev = phb_to_npu2_dev_ocapi(phb);
+ npu2 = dev->npu;
+ xscom_base = npu2->xscom_base;
+ break;
+ case phb_type_pau_opencapi:
+ fir_reg = PAU_FIR(i);
+ fir_mask_reg = PAU_FIR_MASK(i);
+ fir_action0_reg = PAU_FIR_ACTION0(i);
+ fir_action1_reg = PAU_FIR_ACTION1(i);
+ pau = ((struct pau_dev *)(pau_phb_to_opencapi_dev(phb)))->pau;
+ xscom_base = pau->xscom_base;
+ break;
+ default:
+ continue;
}
- }
-
- /* If we didn't find a NPU on the chip, it's not our checkstop. */
- if (!found)
- return;
- npu2_fir_addr = NPU2_FIR_REGISTER_0;
- npu2_fir_mask_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_MASK_OFFSET;
- npu2_fir_action0_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION0_OFFSET;
- npu2_fir_action1_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION1_OFFSET;
-
- for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
- /* Read all the registers necessary to find a checkstop condition. */
- if (xscom_read(flat_chip_id, npu2_fir_addr, &npu2_fir) ||
- xscom_read(flat_chip_id, npu2_fir_mask_addr, &npu2_fir_mask) ||
- xscom_read(flat_chip_id, npu2_fir_action0_addr, &npu2_fir_action0) ||
- xscom_read(flat_chip_id, npu2_fir_action1_addr, &npu2_fir_action1)) {
- prerror("HMI: Couldn't read NPU FIR register%d with XSCOM\n", i);
+ if (xscom_read(flat_chip_id, xscom_base + fir_reg, &fir) ||
+ xscom_read(flat_chip_id, xscom_base + fir_mask_reg, &fir_mask) ||
+ xscom_read(flat_chip_id, xscom_base + fir_action0_reg, &fir_action0) ||
+ xscom_read(flat_chip_id, xscom_base + fir_action1_reg, &fir_action1)) {
+ prerror("HMI: Couldn't read NPU/PAU FIR register%d with XSCOM\n", i);
continue;
}
- fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1;
+ fatal_errors = fir & ~fir_mask & fir_action0 & fir_action1;
if (fatal_errors) {
loc = chip_loc_code(flat_chip_id);
if (!loc)
loc = "Not Available";
- prlog(PR_ERR, "NPU: [Loc: %s] P:%d FIR#%d FIR 0x%016llx mask 0x%016llx\n",
- loc, flat_chip_id, i, npu2_fir, npu2_fir_mask);
- prlog(PR_ERR, "NPU: [Loc: %s] P:%d ACTION0 0x%016llx, ACTION1 0x%016llx\n",
- loc, flat_chip_id, npu2_fir_action0, npu2_fir_action1);
- total_errors++;
-
- encode_npu2_xstop_reason(&xstop_reason, fatal_errors, i);
+ prlog(PR_ERR, "NPU/PAU: [Loc: %s] P:%d FIR#%d "
+ "FIR 0x%016llx mask 0x%016llx\n",
+ loc, flat_chip_id, i, fir, fir_mask);
+ prlog(PR_ERR, "NPU/PAU: [Loc: %s] P:%d ACTION0 "
+ "0x%016llx, ACTION1 0x%016llx\n",
+ loc, flat_chip_id, fir_action0, fir_action1);
+ if (phb->phb_type != phb_type_pcie_v3)
+ encode_npu_xstop_reason(xstop_reason,
+ fatal_errors,
+ i);
+ fir_errors = true;
}
-
- /* Can't do a fence yet, we are just logging fir information for now */
- npu2_fir_addr += NPU2_FIR_OFFSET;
- npu2_fir_mask_addr += NPU2_FIR_OFFSET;
- npu2_fir_action0_addr += NPU2_FIR_OFFSET;
- npu2_fir_action1_addr += NPU2_FIR_OFFSET;
-
}
- if (!total_errors)
- return;
-
- npu2_hmi_verbose = nvram_query_eq_safe("npu2-hmi-verbose", "true");
- /* Force this for now until we sort out something better */
- npu2_hmi_verbose = true;
+ /* dump registers */
+ if (fir_errors) {
+ switch (phb->phb_type) {
+ case phb_type_npu_v2:
+ case phb_type_npu_v2_opencapi:
+ npu2_dump_scoms(npu2, flat_chip_id);
+ break;
+ case phb_type_pau_opencapi:
+ pau_opencapi_dump_scoms(pau);
+ break;
+ default:
+ break;
+ }
- if (npu2_hmi_verbose) {
- npu2_dump_scoms(flat_chip_id);
prlog(PR_ERR, " _________________________ \n");
- prlog(PR_ERR, "< It's Debug time! >\n");
+ prlog(PR_ERR, "< It's Debug time! >\n");
prlog(PR_ERR, " ------------------------- \n");
- prlog(PR_ERR, " \\ ,__, \n");
- prlog(PR_ERR, " \\ (oo)____ \n");
- prlog(PR_ERR, " (__) )\\ \n");
+ prlog(PR_ERR, " \\ ,__, \n");
+ prlog(PR_ERR, " \\ (oo)____ \n");
+ prlog(PR_ERR, " (__) )\\ \n");
prlog(PR_ERR, " ||--|| * \n");
}
- /* Set up the HMI event */
- hmi_evt->severity = OpalHMI_SEV_WARNING;
- hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
- hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
- hmi_evt->u.xstop_error.xstop_reason = cpu_to_be32(xstop_reason);
- hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
-
- /* Marking the event as recoverable so that we don't crash */
- queue_hmi_event(hmi_evt, 1, out_flags);
+ return fir_errors;
}
static void find_npu_checkstop_reason(int flat_chip_id,
@@ -873,67 +880,47 @@ static void find_npu_checkstop_reason(int flat_chip_id,
uint64_t *out_flags)
{
struct phb *phb;
- struct npu *p = NULL;
-
- uint64_t npu_fir;
- uint64_t npu_fir_mask;
- uint64_t npu_fir_action0;
- uint64_t npu_fir_action1;
- uint64_t fatal_errors;
-
- /* Only check for NPU errors if the chip has a NPU */
- if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL)
- return find_npu2_checkstop_reason(flat_chip_id, hmi_evt, out_flags);
-
- /* Find the NPU on the chip associated with the HMI. */
- for_each_phb(phb) {
- /* NOTE: if a chip ever has >1 NPU this will need adjusting */
- if (dt_node_is_compatible(phb->dt_node, "ibm,power8-npu-pciex") &&
- (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
- p = phb_to_npu(phb);
- break;
- }
- }
+ struct dt_node *dn;
+ uint32_t xstop_reason = 0;
- /* If we didn't find a NPU on the chip, it's not our checkstop. */
- if (p == NULL)
+ /* Only check for NPU errors if the chip has a NPU/PAU */
+ if ((PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL) &&
+ (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9) &&
+ (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P10))
return;
- /* Read all the registers necessary to find a checkstop condition. */
- if (xscom_read(flat_chip_id,
- p->at_xscom + NX_FIR, &npu_fir) ||
- xscom_read(flat_chip_id,
- p->at_xscom + NX_FIR_MASK, &npu_fir_mask) ||
- xscom_read(flat_chip_id,
- p->at_xscom + NX_FIR_ACTION0, &npu_fir_action0) ||
- xscom_read(flat_chip_id,
- p->at_xscom + NX_FIR_ACTION1, &npu_fir_action1)) {
- prerror("Couldn't read NPU registers with XSCOM\n");
- return;
- }
+ /* Find the NPU/PAU on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ dn = phb->dt_node;
- fatal_errors = npu_fir & ~npu_fir_mask & npu_fir_action0 & npu_fir_action1;
+ if (!(dt_node_is_compatible(dn, "ibm,power8-npu-pciex") ||
+ dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
+ dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex") ||
+ dt_node_is_compatible(dn, "ibm,power10-pau-opencapi-pciex")))
+ continue;
- /* If there's no errors, we don't need to do anything. */
- if (!fatal_errors)
- return;
+ if (dt_get_chip_id(dn) != flat_chip_id)
+ continue;
- prlog(PR_DEBUG, "NPU: FIR 0x%016llx mask 0x%016llx\n",
- npu_fir, npu_fir_mask);
- prlog(PR_DEBUG, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n",
- npu_fir_action0, npu_fir_action1);
+ /* Read all the registers necessary to find a checkstop condition. */
+ if (!npu_fir_errors(phb, flat_chip_id, &xstop_reason))
+ continue;
- /* Set the NPU to fenced since it can't recover. */
- npu_set_fence_state(p, true);
+ if (phb->phb_type == phb_type_pcie_v3) {
+ /* Set the NPU to fenced since it can't recover. */
+ npu_set_fence_state(phb_to_npu(phb), true);
+ }
- /* Set up the HMI event */
- hmi_evt->severity = OpalHMI_SEV_WARNING;
- hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
- hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
- hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+ /* Set up the HMI event */
+ hmi_evt->severity = OpalHMI_SEV_WARNING;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.xstop_reason = xstop_reason;
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
- /* The HMI is "recoverable" because it shouldn't crash the system */
- queue_hmi_event(hmi_evt, 1, out_flags);
+ /* Marking the event as recoverable so that we don't crash */
+ queue_hmi_event(hmi_evt, 1, out_flags);
+ }
}
static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags)
@@ -962,7 +949,8 @@ static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags
xscom_write(this_cpu()->chip_id, malf_alert_scom,
~PPC_BIT(i));
find_capp_checkstop_reason(i, hmi_evt, &flags);
- find_nx_checkstop_reason(i, hmi_evt, &flags);
+ if (proc_gen != proc_gen_p10)
+ find_nx_checkstop_reason(i, hmi_evt, &flags);
find_npu_checkstop_reason(i, hmi_evt, &flags);
}
}