aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2019-05-23 14:17:51 +0200
committerStewart Smith <stewart@linux.ibm.com>2019-06-04 13:34:08 +1000
commitce49505c2fafabc0fa4a4a5d1e34f6dd2dd7fb8f (patch)
tree85e56ab797d311da2c456f538a16aef7c1cfa030
parent2d4c7f9886ee45f7c1db933885e5002b76c270ad (diff)
downloadskiboot-ce49505c2fafabc0fa4a4a5d1e34f6dd2dd7fb8f.zip
skiboot-ce49505c2fafabc0fa4a4a5d1e34f6dd2dd7fb8f.tar.gz
skiboot-ce49505c2fafabc0fa4a4a5d1e34f6dd2dd7fb8f.tar.bz2
npu2-opencapi: Mask 2 XSL errors
Commit f8dfd699f584 ("hw/npu2: Setup an error interrupt on some opencapi FIRs") converted some FIR bits default action from system checkstop to raising an error interrupt. For 2 XSL error events that can be triggered by a misbehaving AFU, the error interrupt is raised twice, once for each link (the XSL logic in the NPU is shared between 2 links). So a badly behaving AFU could impact another, unsuspecting opencapi adapter. It doesn't look good and it turns out we can do better. We can mask those 2 XSL errors. The error will also be picked up by the OTL logic, which is per link. So we'll still get an error interrupt, but only on the relevant link, and the other opencapi adapter can stay functional. Fixes: f8dfd699f584 ("hw/npu2: Setup an error interrupt on some opencapi FIRs") Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
-rw-r--r--hw/npu2-opencapi.c29
1 files changed, 20 insertions, 9 deletions
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index d6d8996..4c47150 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1583,26 +1583,37 @@ static void mask_nvlink_fir(struct npu2 *p)
static int enable_interrupts(struct npu2 *p)
{
- uint64_t reg, val_xsl, val_override;
+ uint64_t reg, xsl_fault, xstop_override, xsl_mask;
/*
- * Enable translation interrupts for all bricks and override
- * every brick-fatal error to send an interrupt instead of
- * checkstopping.
+ * We need to:
+ * - enable translation interrupts for all bricks
+ * - override most brick-fatal errors from FIR2 to send an
+ * interrupt instead of the default action of checkstopping
+ * the systems, since we can just fence the brick and keep
+ * the system alive.
+ * - the exception to the above is 2 FIRs for XSL errors
+ * resulting of bad AFU behavior, for which we don't want to
+ * checkstop but can't configure to send an error interrupt
+ * either, as the XSL errors are reported on 2 links (the
+ * XSL is shared between 2 links). Instead, we mask
+ * them. The XSL errors will result in an OTL error, which
+ * is reported only once, for the correct link.
*
* FIR bits configured to trigger an interrupt must have their
* default action masked
*/
- val_xsl = PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3);
- val_override = 0x0FFFEFC00FF1B000;
+ xsl_fault = PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3);
+ xstop_override = 0x0FFFEFC00F91B000;
+ xsl_mask = PPC_BIT(41) | PPC_BIT(42);
xscom_read(p->chip_id, p->xscom_base + NPU2_MISC_FIR2_MASK, &reg);
- reg |= val_xsl | val_override;
+ reg |= xsl_fault | xstop_override | xsl_mask;
xscom_write(p->chip_id, p->xscom_base + NPU2_MISC_FIR2_MASK, reg);
reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
NPU2_MISC_DA_LEN_8B);
- reg |= val_xsl | val_override;
+ reg |= xsl_fault | xstop_override;
npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
NPU2_MISC_DA_LEN_8B, reg);
@@ -1613,7 +1624,7 @@ static int enable_interrupts(struct npu2 *p)
*/
reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_FENCE_ENABLE2,
NPU2_MISC_DA_LEN_8B);
- reg |= val_override;
+ reg |= xstop_override;
npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_FENCE_ENABLE2,
NPU2_MISC_DA_LEN_8B, reg);