aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2019-10-09 21:38:11 +0200
committerVasant Hegde <hegdevasant@linux.vnet.ibm.com>2020-03-11 11:01:54 +0530
commit241b48c685680454768a7c8db4886cb1935f85e9 (patch)
treead435c385db5185dd9ac5014381b0a75a90cb055
parent9d37f291983c83b78999e7b47e1b07d6f4f6caac (diff)
downloadskiboot-241b48c685680454768a7c8db4886cb1935f85e9.zip
skiboot-241b48c685680454768a7c8db4886cb1935f85e9.tar.gz
skiboot-241b48c685680454768a7c8db4886cb1935f85e9.tar.bz2
npu2-opencapi: Log a warning when resetting a broken device
[ Upstream commit 233e863c8b1dccad8be7c39336d232a4a3994e6b ] On P9, the NPU doesn't support recovery if the link goes down unexpectedly. It was not fully verified. We mark the device as broken when we receive an error interrupt from the NPU. However, there's nothing to prevent the OS from trying to reset the device; It may or may not work, it's unsupported territory, so let's log a message to make it clear, as it could help when debugging. We haven't hit any cases where the reset goes badly enough that we'd want to prevent it, so let it go for now. We can revisit later if we have evidence that it's causing more problems than it is worth. Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Reviewed-by: Christophe Lombard <clombard@linux.vnet.ibm.com> Signed-off-by: Oliver O'Halloran <oohall@gmail.com> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
-rw-r--r--hw/npu2-opencapi.c4
1 files changed, 4 insertions, 0 deletions
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 2f66aca..b8ebc83 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1237,6 +1237,10 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
case OCAPI_SLOT_LINK_TRAINED:
otl_enabletx(chip_id, dev->npu->xscom_base, dev);
pci_slot_set_state(slot, OCAPI_SLOT_NORMAL);
+ if (dev->flags & NPU2_DEV_BROKEN) {
+ OCAPIERR(dev, "Resetting a device which hit a previous error. Device recovery is not supported, so future behavior is undefined\n");
+ dev->flags &= ~NPU2_DEV_BROKEN;
+ }
check_perf_counters(dev);
dev->phb_ocapi.scan_map = 1;
return OPAL_SUCCESS;