aboutsummaryrefslogtreecommitdiff
path: root/hw/npu2-opencapi.c
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2019-04-05 16:33:01 +0200
committerStewart Smith <stewart@linux.ibm.com>2019-04-09 10:50:55 +1000
commitf8dfd699f5844ce7e7934beb5c9a4fe139d22250 (patch)
tree154a50ff20d01d8f2064ff6030761dcfe8357bf9 /hw/npu2-opencapi.c
parentfa97373f3274de5239124db5c4039a7517d9344c (diff)
downloadskiboot-f8dfd699f5844ce7e7934beb5c9a4fe139d22250.zip
skiboot-f8dfd699f5844ce7e7934beb5c9a4fe139d22250.tar.gz
skiboot-f8dfd699f5844ce7e7934beb5c9a4fe139d22250.tar.bz2
hw/npu2: Setup an error interrupt on some opencapi FIRs
Many errors reported in the NPU FIR2 register, mostly catching unexpected errors on the opencapi link are defined as 'brick fatal' in the workbook, yet the default action is set to system checkstop. It's possible to see those errors during AFU development, where the AFU may send unexpected packets on the link, therefore triggering those errors. Checkstopping the system in this case is clearly extreme, as the error could be contained to the brick and proper analysis of a checkstop is not trivial outside of a bringup environment. This patch changes the default action of those errors so that the NPU will raise an interrupt instead. Follow-up patches will log proper information so that the error can be debugged and linux can catch the event. Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Diffstat (limited to 'hw/npu2-opencapi.c')
-rw-r--r--hw/npu2-opencapi.c39
1 files changed, 32 insertions, 7 deletions
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index d32aaa5..285615a 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1509,9 +1509,9 @@ static void mask_nvlink_fir(struct npu2 *p)
*/
/* Mask FIRs */
- xscom_read(p->chip_id, p->xscom_base + NPU2_MISC_FIR_MASK1, &reg);
+ xscom_read(p->chip_id, p->xscom_base + NPU2_MISC_FIR1_MASK, &reg);
reg = SETFIELD(PPC_BITMASK(0, 11), reg, 0xFFF);
- xscom_write(p->chip_id, p->xscom_base + NPU2_MISC_FIR_MASK1, reg);
+ xscom_write(p->chip_id, p->xscom_base + NPU2_MISC_FIR1_MASK, reg);
/* freeze disable */
reg = npu2_scom_read(p->chip_id, p->xscom_base,
@@ -1535,17 +1535,42 @@ static void mask_nvlink_fir(struct npu2 *p)
NPU2_MISC_IRQ_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
}
-static int enable_xsl_irq(struct npu2 *p)
+static int enable_interrupts(struct npu2 *p)
{
- uint64_t reg;
+ uint64_t reg, val_xsl, val_override;
+
+ /*
+ * Enable translation interrupts for all bricks and override
+ * every brick-fatal error to send an interrupt instead of
+ * checkstopping.
+ *
+ * FIR bits configured to trigger an interrupt must have their
+ * default action masked
+ */
+ val_xsl = PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3);
+ val_override = 0x0FFFEFC00FF1B000;
+
+ xscom_read(p->chip_id, p->xscom_base + NPU2_MISC_FIR2_MASK, &reg);
+ reg |= val_xsl | val_override;
+ xscom_write(p->chip_id, p->xscom_base + NPU2_MISC_FIR2_MASK, reg);
- /* enable translation interrupts for all bricks */
reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
NPU2_MISC_DA_LEN_8B);
- reg |= PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3);
+ reg |= val_xsl | val_override;
npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
NPU2_MISC_DA_LEN_8B, reg);
+ /*
+ * Make sure the brick is fenced on those errors.
+ * Fencing is incompatible with freezing, but there's no
+ * freeze defined for FIR2, so we don't have to worry about it
+ */
+ reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_FENCE_ENABLE2,
+ NPU2_MISC_DA_LEN_8B);
+ reg |= val_override;
+ npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_FENCE_ENABLE2,
+ NPU2_MISC_DA_LEN_8B, reg);
+
mask_nvlink_fir(p);
return 0;
}
@@ -1704,7 +1729,7 @@ int npu2_opencapi_init_npu(struct npu2 *npu)
address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
}
- enable_xsl_irq(npu);
+ enable_interrupts(npu);
for (int i = 0; i < npu->total_devices; i++) {
dev = &npu->devices[i];