aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2019-05-20 14:19:21 +1000
committerVasant Hegde <hegdevasant@linux.vnet.ibm.com>2019-07-01 15:35:26 +0530
commit07dcf5cdbdd8e029c1ce57bf98d392638e7ba5a6 (patch)
treea9b40658c7662a484141d55a33e38ebd9ba4c4b6
parenta2ecdcf552890d238d3f5f0f5da2726de8ad1824 (diff)
downloadskiboot-07dcf5cdbdd8e029c1ce57bf98d392638e7ba5a6.zip
skiboot-07dcf5cdbdd8e029c1ce57bf98d392638e7ba5a6.tar.gz
skiboot-07dcf5cdbdd8e029c1ce57bf98d392638e7ba5a6.tar.bz2
npu2: Reset NVLinks when resetting a GPU
[ Upstream commit 68d11e4460ecaaa7f6253f836d787a1582266074 ] Resetting a V100 GPU brings its NVLinks down and if an NPU tries using those, an HMI occurs. We were lucky not to observe this as the bare metal does not normally reset a GPU and when passed through, GPUs are usually before NPUs in QEMU command line or Libvirt XML and because of that NPUs are naturally reset first. However simple change of the device order brings HMIs. This defines a bus control filter for a PCI slot with a GPU with NVLinks so when the host system issues secondary bus reset to the slot, it resets associated NVLinks. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Stewart Smith <stewart@linux.ibm.com> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
-rw-r--r--hw/npu2.c55
1 files changed, 55 insertions, 0 deletions
diff --git a/hw/npu2.c b/hw/npu2.c
index 4e57aad..c9c6bd1 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -537,6 +537,48 @@ static int __npu2_dev_bind_pci_dev(struct phb *phb __unused,
return 0;
}
+static int64_t npu2_gpu_brigde_sec_bus_reset(void *dev,
+ struct pci_cfg_reg_filter *pcrf __unused,
+ uint32_t offset, uint32_t len,
+ uint32_t *data, bool write)
+{
+ struct pci_device *pd = dev;
+ struct pci_device *gpu;
+ struct phb *npphb;
+ struct npu2 *npu;
+ struct dt_node *np;
+ struct npu2_dev *ndev;
+ int i;
+
+ assert(write);
+
+ if ((len != 2) || (offset & 1)) {
+ /* Short config writes are not supported */
+ PCIERR(pd->phb, pd->bdfn,
+ "Unsupported write to bridge control register\n");
+ return OPAL_PARAMETER;
+ }
+
+ gpu = list_top(&pd->children, struct pci_device, link);
+ if (gpu && (*data & PCI_CFG_BRCTL_SECONDARY_RESET)) {
+ dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") {
+ npphb = pci_get_phb(dt_prop_get_cell(np,
+ "ibm,opal-phbid", 1));
+ if (!npphb || npphb->phb_type != phb_type_npu_v2)
+ continue;
+
+ npu = phb_to_npu2_nvlink(npphb);
+ for (i = 0; i < npu->total_devices; ++i) {
+ ndev = &npu->devices[i];
+ if (ndev->nvlink.pd == gpu)
+ npu2_dev_procedure_reset(ndev);
+ }
+ }
+ }
+
+ return OPAL_PARTIAL;
+}
+
static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
{
struct phb *phb;
@@ -558,6 +600,19 @@ static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
dev->nvlink.phb = phb;
/* Found the device, set the bit in config space */
npu2_set_link_flag(dev, NPU2_DEV_PCI_LINKED);
+
+ /*
+ * We define a custom sec bus reset handler for a slot
+ * with an NVLink-connected GPU to prevent HMIs which
+ * will otherwise happen if we reset GPU before
+ * resetting NVLinks.
+ */
+ if (dev->nvlink.pd->parent &&
+ dev->nvlink.pd->parent->slot)
+ pci_add_cfg_reg_filter(dev->nvlink.pd->parent,
+ PCI_CFG_BRCTL, 2,
+ PCI_REG_FLAG_WRITE,
+ npu2_gpu_brigde_sec_bus_reset);
return;
}
}