diff options
-rw-r--r-- | core/init.c | 1 | ||||
-rw-r--r-- | doc/device-tree/nvlink.rst | 27 | ||||
-rw-r--r-- | doc/nvlink.rst | 22 | ||||
-rw-r--r-- | hw/Makefile.inc | 2 | ||||
-rw-r--r-- | hw/npu2.c | 1388 | ||||
-rw-r--r-- | include/npu2.h | 152 | ||||
-rw-r--r-- | include/pci.h | 1 | ||||
-rw-r--r-- | include/skiboot.h | 1 |
8 files changed, 1587 insertions, 7 deletions
diff --git a/core/init.c b/core/init.c index d9d62ee..6b8137c 100644 --- a/core/init.c +++ b/core/init.c @@ -952,6 +952,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt) /* Probe NPUs */ probe_npu(); + probe_npu2(); /* Initialize PCI */ pci_init_slots(); diff --git a/doc/device-tree/nvlink.rst b/doc/device-tree/nvlink.rst index 8375dc4..6ce44e9 100644 --- a/doc/device-tree/nvlink.rst +++ b/doc/device-tree/nvlink.rst @@ -66,6 +66,27 @@ NPU bindings: }; }; +GPU memory bindings +------------------- + +.. code-block:: dts + + memory@100000000 { + device_type = "memory" + compatible = "ibm,coherent-device-memory"; + linux,usable-memory = <0x0 0x100000000 0x0 0x0>; + + ; denotes a region of unplugged system memory + + reg = <0x0 0x100000000 0x0 0x80000000>; + ibm,associativity = <0x4 0x0 0x0 0x0 0x64>; + + ; numa associativity for the memory once it is hotplugged + + phandle = <0x10000abc>; + linux,phandle = <0x10000abc>; + }; + Emulated PCI device bindings ---------------------------- @@ -83,7 +104,8 @@ Emulated PCI device bindings ibm,pci-config-space-type = <0x1>; vendor-id = <0x1014>; ibm,gpu = <0x100002f7>; /* phandle pointing the associated GPU PCI device node */ - phandle = <0x100002fc>; + memory-region = <0x10000abc>; /* phandle pointing to the GPU memory */ + phandle = <0x100002fc>; }; pci@1 { @@ -94,6 +116,7 @@ Emulated PCI device bindings ibm,pci-config-space-type = <0x1>; vendor-id = <0x1014>; ibm,gpu = <0x100002f5>; + memory-region = <0x10000def>; phandle = <0x100002fe>; class-code = <0x60400>; linux,phandle = <0x100002fe>; @@ -107,6 +130,7 @@ Emulated PCI device bindings ibm,pci-config-space-type = <0x1>; vendor-id = <0x1014>; ibm,gpu = <0x100002f7>; + memory-region = <0x10000abc>; phandle = <0x100002fd>; class-code = <0x60400>; linux,phandle = <0x100002fd>; @@ -120,6 +144,7 @@ Emulated PCI device bindings ibm,pci-config-space-type = <0x1>; vendor-id = <0x1014>; ibm,gpu = <0x100002f5>; + memory-region = <0x10000def>; phandle = <0x100002ff>; class-code = <0x60400>; linux,phandle = <0x100002ff>; diff --git a/doc/nvlink.rst b/doc/nvlink.rst index d035968..cba64df 100644 --- a/doc/nvlink.rst +++ b/doc/nvlink.rst @@ -38,14 +38,15 @@ related to the setup of DMA windows. Configuration Space Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -============ =============== ===== -============ =============== ===== -Vendor ID 0x1014 (IBM) +============ =================== ===== +============ =================== ===== +Vendor ID 0x1014 (IBM) Device ID 0x04ea Revision ID 0x00 -Class 0x068000 (Bridge Device Other, ProgIf = 0x0) +Class 0x068000 / 0x068001 (Bridge Device Other, ProgIf = 0x0 / 0x1) BAR0/1 TL/DL Registers -============ =============== ===== +BAR2/3 GEN-ID Registers (Only for rev-id = 0x1) +============ =================== ===== TL/DL Registers ^^^^^^^^^^^^^^^ @@ -59,6 +60,17 @@ to 64-bit BAR#0 of the emulated PCI device configuration space. :: | DL (64K) | BAR#0 +-----------+ +Generation Registers (GEN-ID) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +On POWER9 each link has 64K of generation ID registers for the relaxed +ordering mode syncronisation. Refer to the programming guide for +details of the register layout in this BAR. + +Relaxed ordering mode will be disabled by default as it requires +device driver support. Device drivers will need to request relaxed +ordering mode through some yet to be designed mechanism. + Vendor Specific Capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: diff --git a/hw/Makefile.inc b/hw/Makefile.inc index d87f85e..bcddcf0 100644 --- a/hw/Makefile.inc +++ b/hw/Makefile.inc @@ -6,7 +6,7 @@ HW_OBJS += nx.o nx-rng.o nx-crypto.o nx-842.o HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o -HW_OBJS += fake-nvram.o lpc-mbox.o +HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o HW=hw/built-in.o # FIXME hack this for now diff --git a/hw/npu2.c b/hw/npu2.c new file mode 100644 index 0000000..ada6b38 --- /dev/null +++ b/hw/npu2.c @@ -0,0 +1,1388 @@ +/* Copyright 2013-2016 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <skiboot.h> +#include <io.h> +#include <timebase.h> +#include <pci-cfg.h> +#include <pci.h> +#include <pci-slot.h> +#include <pci-virt.h> +#include <interrupts.h> +#include <opal.h> +#include <opal-api.h> +#include <cpu.h> +#include <device.h> +#include <ccan/str/str.h> +#include <ccan/array_size/array_size.h> +#include <affinity.h> +#include <npu2-regs.h> +#include <npu2.h> +#include <lock.h> +#include <xscom.h> +#include <bitutils.h> +#include <chip.h> + +/* + * NPU2 BAR layout definition. We have 3 stacks and each of them + * contains 2 bricks. So every NPU2 has 6 bricks in total. There are 2 + * PHY BARs and each of them is shared by 3 bricks. Every brick has + * one NTL BAR and two bricks share one GENID BAR. There is also a + * global MMIO BAR. We only expose DL and GENID BARs to the OS and all + * other BARs will be hidden in skiboot. + * + * Before the global MMIO BAR is configured, scom is the only way to + * access the BAR registers. At NPU2 PHB probing time, we rely on scom + * to assign all BARs until the global MMIO BAR is established. + * + * We need to access 4 SM registers in the same stack in order to + * configure one particular BAR. + */ +#define NPU2_DEFINE_BAR(t, n, s) \ + { .flags = 0, \ + .type = t, \ + .reg = NPU2_##n, \ + .stack = s, \ + .base = 0ul, \ + .size = 0ul, \ + } + +#define VENDOR_CAP_START 0x80 +#define VENDOR_CAP_END 0x90 + +#define VENDOR_CAP_PCI_DEV_OFFSET 0x0d + +static struct npu2_bar npu2_bars[] = { + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_GLOBAL, PHY_BAR, NPU2_STACK_STCK_2), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_PHY, PHY_BAR, NPU2_STACK_STCK_0), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_PHY, PHY_BAR, NPU2_STACK_STCK_1), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_NTL, NTL0_BAR, NPU2_STACK_STCK_0), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_NTL, NTL1_BAR, NPU2_STACK_STCK_0), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_NTL, NTL0_BAR, NPU2_STACK_STCK_1), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_NTL, NTL1_BAR, NPU2_STACK_STCK_1), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_NTL, NTL0_BAR, NPU2_STACK_STCK_2), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_NTL, NTL1_BAR, NPU2_STACK_STCK_2), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_GENID, GENID_BAR, NPU2_STACK_STCK_0), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_GENID, GENID_BAR, NPU2_STACK_STCK_1), + NPU2_DEFINE_BAR(NPU2_BAR_TYPE_GENID, GENID_BAR, NPU2_STACK_STCK_2) +}; + +/* + * We use the indirect method because it uses the same addresses as + * the MMIO offsets (NPU RING) + */ +static void npu2_scom_set_addr(uint64_t gcid, uint64_t scom_base, + uint64_t addr, uint64_t size) +{ + addr = SETFIELD(NPU2_MISC_DA_ADDR, 0ull, addr); + addr = SETFIELD(NPU2_MISC_DA_LEN, addr, size); + xscom_write(gcid, scom_base + NPU2_MISC_SCOM_IND_SCOM_ADDR, addr); +} + +static void npu2_scom_write(uint64_t gcid, uint64_t scom_base, + uint64_t reg, uint64_t size, + uint64_t val) +{ + npu2_scom_set_addr(gcid, scom_base, reg, size); + xscom_write(gcid, scom_base + NPU2_MISC_SCOM_IND_SCOM_DATA, val); +} + +static uint64_t npu2_scom_read(uint64_t gcid, uint64_t scom_base, + uint64_t reg, uint64_t size) +{ + uint64_t val; + + npu2_scom_set_addr(gcid, scom_base, reg, size); + xscom_read(gcid, scom_base + NPU2_MISC_SCOM_IND_SCOM_DATA, &val); + + return val; +} + +void npu2_write_4b(struct npu2 *p, uint64_t reg, uint64_t val) +{ + npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_4B, val); +} + +uint64_t npu2_read_4b(struct npu2 *p, uint64_t reg) +{ + return npu2_scom_read(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_4B); +} + +void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val) +{ + npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_8B, val); +} + +uint64_t npu2_read(struct npu2 *p, uint64_t reg) +{ + return npu2_scom_read(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_8B); +} + +void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask) +{ + uint64_t new_val; + + new_val = npu2_read(p, reg); + new_val &= ~mask; + new_val |= val & mask; + npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_8B, new_val); +} + +static inline void npu2_ioda_sel(struct npu2 *p, uint32_t table, + uint32_t index, bool autoinc) +{ + out_be64(p->regs + NPU2_ATS_IODA_TBL, + (autoinc ? NPU2_ATS_IODA_TBL_AUTOINC : 0ul) | + SETFIELD(NPU2_ATS_IODA_TBL_SELECT, 0ul, table) | + SETFIELD(NPU2_ATS_IODA_TBL_INDEX, 0ul, index)); +} + +static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p, + uint32_t bdfn) +{ + struct pci_virt_device *pvd; + + /* All emulated devices are attached to root bus */ + if (bdfn & ~0xff) + return NULL; + + pvd = pci_virt_find_device(&p->phb, bdfn); + if (pvd) + return pvd->data; + + return NULL; +} + +static void npu2_write_bar(struct npu2 *p, + struct npu2_bar *bar, + uint32_t gcid, + uint32_t scom) +{ + uint64_t reg, val, enable = !!(bar->flags & NPU2_BAR_FLAG_ENABLED); + int block; + + switch (bar->type) { + case NPU2_BAR_TYPE_GLOBAL: + case NPU2_BAR_TYPE_PHY: + val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21); + val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, enable); + break; + case NPU2_BAR_TYPE_NTL: + val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 17); + val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, enable); + break; + case NPU2_BAR_TYPE_GENID: + val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 17); + val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, enable); + break; + default: + val = 0ul; + } + + for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { + reg = NPU2_REG_OFFSET(bar->stack, block, bar->reg); + if (p) + npu2_write(p, reg, val); + else + npu2_scom_write(gcid, scom, reg, val, NPU2_MISC_DA_LEN_8B); + } +} + +/* Trap for PCI command (0x4) to enable or disable device's BARs */ +static int64_t npu2_cfg_write_cmd(void *dev, + struct pci_cfg_reg_filter *pcrf __unused, + uint32_t offset, uint32_t size, + uint32_t *data, bool write) +{ + struct pci_virt_device *pvd = dev; + struct npu2_dev *ndev = pvd->data; + struct npu2_bar *ntl_npu_bar, *genid_npu_bar; + bool enabled; + + if (!write) + return OPAL_PARTIAL; + + if (offset != PCI_CFG_CMD) + return OPAL_PARAMETER; + if (size != 1 && size != 2 && size != 4) + return OPAL_PARAMETER; + + /* + * Enable or disable NTL and GENID BAR. Two bricks share + * one GENID BAR, which is exposed via the first brick. + */ + enabled = !!(*data & PCI_CFG_CMD_MEM_EN); + ntl_npu_bar = ndev->bars[0].npu2_bar; + genid_npu_bar = ndev->bars[1].npu2_bar; + + ntl_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED, ntl_npu_bar->flags, enabled); + npu2_write_bar(ndev->npu, ntl_npu_bar, 0, 0); + + /* + * Enable/disable the GENID BAR. Two bricks share one GENID + * BAR which is exposed via the first brick so we need to + * track the enables separately. + */ + if (NPU2DEV_BRICK(ndev)) + genid_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED1, genid_npu_bar->flags, + enabled); + else + genid_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED0, genid_npu_bar->flags, + enabled); + + /* Enable the BAR if either deivce requests it enabled, otherwise disable it */ + genid_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED, genid_npu_bar->flags, + !!(genid_npu_bar->flags & (NPU2_BAR_FLAG_ENABLED0 | + NPU2_BAR_FLAG_ENABLED1))); + npu2_write_bar(ndev->npu, genid_npu_bar, 0, 0); + + return OPAL_PARTIAL; +} + +static int64_t npu2_cfg_read_bar(struct npu2_dev *dev __unused, + struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t size, + uint32_t *data) +{ + struct npu2_pcie_bar *bar = (struct npu2_pcie_bar *) pcrf->data; + + if (!(bar->flags & NPU2_PCIE_BAR_FLAG_TRAPPED)) + return OPAL_PARTIAL; + + if ((size != 4) || + (offset != pcrf->start && offset != pcrf->start + 4)) + return OPAL_PARAMETER; + + if (bar->flags & NPU2_PCIE_BAR_FLAG_SIZE_HI) + *data = bar->size >> 32; + else + *data = bar->size; + bar->flags &= ~(NPU2_PCIE_BAR_FLAG_TRAPPED | NPU2_PCIE_BAR_FLAG_SIZE_HI); + + return OPAL_SUCCESS; +} + +static int64_t npu2_cfg_write_bar(struct npu2_dev *dev, + struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t size, + uint32_t data) +{ + struct pci_virt_device *pvd = dev->pvd; + struct npu2_pcie_bar *bar = (struct npu2_pcie_bar *) pcrf->data; + uint32_t pci_cmd; + + if ((size != 4) || + (offset != pcrf->start && offset != pcrf->start + 4)) + return OPAL_PARAMETER; + + /* Return BAR size on next read */ + if (data == 0xffffffff) { + bar->flags |= NPU2_PCIE_BAR_FLAG_TRAPPED; + if (offset == pcrf->start + 4) + bar->flags |= NPU2_PCIE_BAR_FLAG_SIZE_HI; + + return OPAL_SUCCESS; + } + + if (offset == pcrf->start) { + bar->base &= 0xffffffff00000000; + bar->base |= (data & 0xfffffff0); + } else { + bar->base &= 0x00000000ffffffff; + bar->base |= ((uint64_t)data << 32); + + PCI_VIRT_CFG_NORMAL_RD(pvd, PCI_CFG_CMD, 4, &pci_cmd); + + if (bar->npu2_bar->type == NPU2_BAR_TYPE_GENID && NPU2DEV_BRICK(dev)) + bar->base -= 0x10000; + + /* Only allow changing the base address if the BAR is not enabled */ + if ((bar->npu2_bar->flags & NPU2_BAR_FLAG_ENABLED) && + (bar->npu2_bar->base != bar->base)) + return OPAL_HARDWARE; + + bar->npu2_bar->base = bar->base; + npu2_write_bar(dev->npu, bar->npu2_bar, 0, 0); + } + + /* To update the config cache */ + return OPAL_PARTIAL; +} + +static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t len, uint32_t *data, + bool write) +{ + struct pci_virt_device *pvd = dev; + struct npu2_dev *ndev = (struct npu2_dev *) pvd->data; + + if (write) + return npu2_cfg_write_bar(ndev, pcrf, offset, len, *data); + + return npu2_cfg_read_bar(ndev, pcrf, offset, len, data); +} + +#define NPU2_CFG_READ(size, type) \ +static int64_t npu2_cfg_read##size(struct phb *phb, uint32_t bdfn, \ + uint32_t offset, type *data) \ +{ \ + uint32_t val; \ + int64_t ret; \ + \ + ret = pci_virt_cfg_read(phb, bdfn, offset, \ + sizeof(*data), &val); \ + *data = (type)val; \ + return ret; \ +} +#define NPU2_CFG_WRITE(size, type) \ +static int64_t npu2_cfg_write##size(struct phb *phb, uint32_t bdfn, \ + uint32_t offset, type data) \ +{ \ + uint32_t val = data; \ + int64_t ret; \ + \ + ret = pci_virt_cfg_write(phb, bdfn, offset, \ + sizeof(data), val); \ + return ret; \ +} + +NPU2_CFG_READ(8, u8); +NPU2_CFG_READ(16, u16); +NPU2_CFG_READ(32, u32); +NPU2_CFG_WRITE(8, u8); +NPU2_CFG_WRITE(16, u16); +NPU2_CFG_WRITE(32, u32); + +static int __npu2_dev_bind_pci_dev(struct phb *phb __unused, + struct pci_device *pd, + void *data) +{ + struct npu2_dev *dev = data; + struct dt_node *pci_dt_node; + char *pcislot; + + /* Ignore non-nvidia PCI devices */ + if ((pd->vdid & 0xffff) != 0x10de) + return 0; + + /* Find the PCI device's slot location */ + for (pci_dt_node = pd->dn; + pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label"); + pci_dt_node = pci_dt_node->parent); + + if (!pci_dt_node) + return 0; + + pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label"); + + prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n", + pcislot, dev->slot_label); + + if (streq(pcislot, dev->slot_label)) + return 1; + + return 0; +} + +static void npu2_dev_bind_pci_dev(struct npu2_dev *dev) +{ + struct phb *phb; + uint32_t i; + + if (dev->pd) + return; + + for (i = 0; i < 64; i++) { + if (dev->npu->phb.opal_id == i) + continue; + + phb = pci_get_phb(i); + if (!phb) + continue; + + dev->pd = pci_walk_dev(phb, NULL, __npu2_dev_bind_pci_dev, dev); + if (dev->pd) { + dev->phb = phb; + /* Found the device, set the bit in config space */ + PCI_VIRT_CFG_INIT_RO(dev->pvd, VENDOR_CAP_START + + VENDOR_CAP_PCI_DEV_OFFSET, 1, 0x01); + return; + } + } + + prlog(PR_INFO, "%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n", + __func__, dev->npu->phb.opal_id, dev->index); +} + +static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED; + +static void npu2_append_phandle(struct dt_node *dn, + u32 phandle) +{ + struct dt_property *prop; + uint32_t *npu_phandles; + size_t len; + + /* + * Use a lock to make sure no one else has a reference to an + * ibm,npu property (this assumes this is the only function + * that holds a reference to it) + */ + lock(&pci_npu_phandle_lock); + + /* This function shouldn't be called unless ibm,npu exists */ + prop = (struct dt_property *)dt_require_property(dn, "ibm,npu", -1); + + /* Need to append to the properties */ + len = prop->len + sizeof(*npu_phandles); + dt_resize_property(&prop, len); + prop->len = len; + + npu_phandles = (uint32_t *)prop->prop; + npu_phandles[len / sizeof(*npu_phandles) - 1] = phandle; + unlock(&pci_npu_phandle_lock); +} + +static int npu2_dn_fixup(struct phb *phb, + struct pci_device *pd, + void *data __unused) +{ + struct npu2 *p = phb_to_npu2(phb); + struct npu2_dev *dev; + + dev = npu2_bdf_to_dev(p, pd->bdfn); + assert(dev); + if (dev->phb || dev->pd) + return 0; + + dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dt_node->phandle); + + /* NPU devices require a slot location to associate with GPUs */ + dev->slot_label = dt_prop_get_def(pd->dn, "ibm,slot-label", NULL); + if (!dev->slot_label) { + /** + * @fwts-label NPUNoPHBSlotLabel + * @fwts-advice No GPU/NPU slot information was found. + * NVLink2 functionality will not work. + */ + prlog(PR_ERR, "NPU: Cannot find GPU slot information\n"); + return 0; + } + + /* + * Bind the emulated PCI device with the real one, which can't + * be done until the PCI devices are populated. Once the real + * PCI device is identified, we also need fix the device-tree + * for it + */ + npu2_dev_bind_pci_dev(dev); + if (dev->phb && dev->pd && dev->pd->dn) { + if (dt_find_property(dev->pd->dn, "ibm,npu")) + npu2_append_phandle(dev->pd->dn, pd->dn->phandle); + else + dt_add_property_cells(dev->pd->dn, "ibm,npu", pd->dn->phandle); + + dt_add_property_cells(pd->dn, "ibm,gpu", dev->pd->dn->phandle); + dev->gpu_bdfn = dev->pd->bdfn; + } + + return 0; +} + +static void npu2_phb_final_fixup(struct phb *phb) +{ + pci_walk_dev(phb, NULL, npu2_dn_fixup, NULL); +} + +static void npu2_init_ioda_cache(struct npu2 *p) +{ + uint64_t val[2]; + uint32_t i; + + /* + * PE mapping: there are two sets of registers. One of them + * is used to map PEs for transactions. Another set is used + * for error routing. We should have consistent setting in + * both of them. Note that each brick can support 3 PEs at + * the maximal degree. For now, we just support one PE per + * brick. + */ + val[0] = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE; + val[0] = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, + val[0], NPU2_RESERVED_PE_NUM); + val[1] = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE; + val[1] = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, + val[1], NPU2_RESERVED_PE_NUM); + for (i = 0; i < ARRAY_SIZE(p->bdf2pe_cache); i++) { + if (i < ARRAY_SIZE(p->bdf2pe_cache)) + p->bdf2pe_cache[i] = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, + val[0], i / 3); + else + p->bdf2pe_cache[i] = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, + val[1], i / 3); + + if (i % 3) + p->bdf2pe_cache[i] = 0ul; + } + + /* TVT */ + memset(p->tve_cache, 0, sizeof(p->tve_cache)); +} + +static int64_t npu2_ioda_reset(struct phb *phb, bool purge) +{ + struct npu2 *p = phb_to_npu2(phb); + uint32_t i; + + if (purge) { + NPU2DBG(p, "Purging all IODA tables...\n"); + npu2_init_ioda_cache(p); + } + + /* TVT */ + npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, 0, true); + for (i = 0; i < ARRAY_SIZE(p->tve_cache); i++) + out_be64(p->regs + NPU2_ATS_IODA_DATA, p->tve_cache[i]); + + return OPAL_SUCCESS; +} + +static void npu2_hw_init(struct npu2 *p) +{ + uint64_t val; + + npu2_ioda_reset(&p->phb, false); + + /* Enable XTS retry mode */ + val = npu2_read(p, NPU2_XTS_CFG); + npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_TRY_ATR_RO); +} + +static int64_t npu2_map_pe_dma_window_real(struct phb *phb, + uint64_t pe_num, + uint16_t window_id, + uint64_t pci_start_addr, + uint64_t pci_mem_size) +{ + struct npu2 *p = phb_to_npu2(phb); + uint64_t end; + uint64_t tve; + + /* Sanity check. Each PE has one corresponding TVE */ + if (pe_num >= NPU2_MAX_PE_NUM || + window_id != pe_num) + return OPAL_PARAMETER; + + if (pci_mem_size) { + /* Enable */ + + end = pci_start_addr + pci_mem_size; + + /* We have to be 16M aligned */ + if ((pci_start_addr & 0x00ffffff) || + (pci_mem_size & 0x00ffffff)) + return OPAL_PARAMETER; + + /* + * It *looks* like this is the max we can support (we need + * to verify this. Also we are not checking for rollover, + * but then we aren't trying too hard to protect ourselves + * againt a completely broken OS. + */ + if (end > 0x0003ffffffffffffull) + return OPAL_PARAMETER; + + /* + * Put start address bits 49:24 into TVE[52:53]||[0:23] + * and end address bits 49:24 into TVE[54:55]||[24:47] + * and set TVE[51] + */ + tve = (pci_start_addr << 16) & (0xffffffull << 40); + tve |= (pci_start_addr >> 38) & (3ull << 10); + tve |= (end >> 8) & (0xfffffful << 16); + tve |= (end >> 40) & (3ull << 8); + tve |= PPC_BIT(51); + } else { + /* Disable */ + tve = 0; + } + + npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, window_id, false); + out_be64(p->regs + NPU2_ATS_IODA_DATA, tve); + p->tve_cache[window_id] = tve; + + return OPAL_SUCCESS; +} + +static int64_t npu2_map_pe_dma_window(struct phb *phb, + uint64_t pe_num, + uint16_t window_id, + uint16_t tce_levels, + uint64_t tce_table_addr, + uint64_t tce_table_size, + uint64_t tce_page_size) +{ + struct npu2 *p = phb_to_npu2(phb); + uint64_t tts_encoded; + uint64_t data64 = 0; + + /* Sanity check. Each PE has one corresponding TVE */ + if (pe_num >= NPU2_MAX_PE_NUM || + window_id != pe_num) + return OPAL_PARAMETER; + + /* + * Special condition, zero TCE table size used to disable + * the TVE. + */ + if (!tce_table_size) { + npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, window_id, false); + out_be64(p->regs + NPU2_ATS_IODA_DATA, 0ul); + p->tve_cache[window_id] = 0ul; + return OPAL_SUCCESS; + } + + /* Additional arguments validation */ + if (tce_levels < 1 || + tce_levels > 4 || + !is_pow2(tce_table_size) || + tce_table_size < 0x1000) + return OPAL_PARAMETER; + + /* TCE table size */ + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_TTA, 0ul, tce_table_addr >> 12); + tts_encoded = ilog2(tce_table_size) - 11; + if (tts_encoded > 39) + return OPAL_PARAMETER; + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_SIZE, data64, tts_encoded); + + /* TCE page size */ + switch (tce_page_size) { + case 0x10000: /* 64K */ + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 5); + break; + case 0x1000000: /* 16M */ + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 13); + break; + case 0x10000000: /* 256M */ + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 17); + break; + case 0x1000: /* 4K */ + default: + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 1); + } + + /* Number of levels */ + data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_LEVEL, data64, tce_levels - 1); + + /* Update to hardware */ + npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, window_id, false); + out_be64(p->regs + NPU2_ATS_IODA_DATA, data64); + p->tve_cache[window_id] = data64; + + return OPAL_SUCCESS; +} + +static int64_t npu2_set_pe(struct phb *phb, + uint64_t pe_num, + uint64_t bdfn, + uint8_t bcompare, + uint8_t dcompare, + uint8_t fcompare, + uint8_t action) +{ + struct npu2 *p = phb_to_npu2(phb); + struct npu2_dev *dev; + uint64_t reg, val; + + /* Sanity check */ + if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE) + return OPAL_PARAMETER; + if (pe_num >= NPU2_MAX_PE_NUM) + return OPAL_PARAMETER; + if (bdfn >> 8) + return OPAL_PARAMETER; + if (bcompare != OpalPciBusAll || + dcompare != OPAL_COMPARE_RID_DEVICE_NUMBER || + fcompare != OPAL_COMPARE_RID_FUNCTION_NUMBER) + return OPAL_UNSUPPORTED; + + /* Get the NPU2 device */ + dev = npu2_bdf_to_dev(p, bdfn); + if (!dev) + return OPAL_PARAMETER; + + val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE; + val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num); + val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->gpu_bdfn); + + if (!NPU2DEV_BRICK(dev)) + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, + NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0); + else + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, + NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0); + + npu2_write(p, reg, val); + val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE; + val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num); + val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->gpu_bdfn); + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, + NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18)); + p->bdf2pe_cache[dev->index] = val; + npu2_write(p, reg, val); + + return OPAL_SUCCESS; +} + +static int64_t npu2_get_link_state(struct pci_slot *slot __unused, uint8_t *val) +{ + /* + * As we're emulating all PCI stuff, the link bandwidth + * isn't big deal anyway. + */ + *val = OPAL_SHPC_LINK_UP_x1; + return OPAL_SUCCESS; +} + +static int64_t npu2_get_power_state(struct pci_slot *slot __unused, uint8_t *val) +{ + *val = PCI_SLOT_POWER_ON; + return OPAL_SUCCESS; +} + +static int64_t npu2_hreset(struct pci_slot *slot __unused) +{ + return OPAL_SUCCESS; +} + +static int64_t npu2_freset(struct pci_slot *slot __unused) +{ + return OPAL_SUCCESS; +} + +static struct pci_slot *npu2_slot_create(struct phb *phb) +{ + struct pci_slot *slot; + + slot = pci_slot_alloc(phb, NULL); + if (!slot) + return slot; + + /* Elementary functions */ + slot->ops.get_presence_state = NULL; + slot->ops.get_link_state = npu2_get_link_state; + slot->ops.get_power_state = npu2_get_power_state; + slot->ops.get_attention_state = NULL; + slot->ops.get_latch_state = NULL; + slot->ops.set_power_state = NULL; + slot->ops.set_attention_state = NULL; + + slot->ops.prepare_link_change = NULL; + slot->ops.poll_link = NULL; + slot->ops.hreset = npu2_hreset; + slot->ops.freset = npu2_freset; + slot->ops.creset = NULL; + + return slot; +} + +static int64_t npu2_freeze_status(struct phb *phb __unused, + uint64_t pe_number __unused, + uint8_t *freeze_state, + uint16_t *pci_error_type __unused, + uint16_t *severity __unused, + uint64_t *phb_status __unused) +{ + /* + * FIXME: When it's called by skiboot PCI config accessor, + * the PE number is fixed to 0, which is incorrect. We need + * introduce another PHB callback to translate it. For now, + * it keeps the skiboot PCI enumeration going. + */ + *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN; + return OPAL_SUCCESS; +} + +static int64_t npu2_tce_kill(struct phb *phb, uint32_t kill_type, + uint64_t pe_number, uint32_t tce_size, + uint64_t dma_addr, uint32_t npages) +{ + struct npu2 *npu = phb_to_npu2(phb); + uint32_t tce_page_size; + uint64_t val; + + if (pe_number > NPU2_MAX_PE_NUM) + return OPAL_PARAMETER; + + sync(); + switch(kill_type) { + case OPAL_PCI_TCE_KILL_PAGES: + tce_page_size = GETFIELD(npu->tve_cache[pe_number], NPU2_ATS_IODA_TBL_TVT_PSIZE); + if (tce_page_size != tce_size) { + NPU2ERR(npu, "npu2_tce_kill: Unexpected TCE size (got 0x%x expected 0x%x)\n", + tce_size, tce_page_size); + return OPAL_PARAMETER; + } + + while (npages--) { + val = SETFIELD(NPU2_ATS_TCE_KILL_PENUM, dma_addr, pe_number); + npu2_write(npu, NPU2_ATS_TCE_KILL, NPU2_ATS_TCE_KILL_ONE | val); + } + break; + case OPAL_PCI_TCE_KILL_PE: + /* + * NPU2 doesn't support killing a PE so fall through + * and do a kill all instead. + */ + case OPAL_PCI_TCE_KILL: + npu2_write(npu, NPU2_ATS_TCE_KILL, NPU2_ATS_TCE_KILL_ALL); + break; + default: + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +static const struct phb_ops npu_ops = { + .cfg_read8 = npu2_cfg_read8, + .cfg_read16 = npu2_cfg_read16, + .cfg_read32 = npu2_cfg_read32, + .cfg_write8 = npu2_cfg_write8, + .cfg_write16 = npu2_cfg_write16, + .cfg_write32 = npu2_cfg_write32, + .choose_bus = NULL, + .device_init = NULL, + .phb_final_fixup = npu2_phb_final_fixup, + .ioda_reset = npu2_ioda_reset, + .papr_errinjct_reset = NULL, + .pci_reinit = NULL, + .set_phb_mem_window = NULL, + .phb_mmio_enable = NULL, + .map_pe_mmio_window = NULL, + .map_pe_dma_window = npu2_map_pe_dma_window, + .map_pe_dma_window_real = npu2_map_pe_dma_window_real, + .pci_msi_eoi = NULL, + .set_xive_pe = NULL, + .get_msi_32 = NULL, + .get_msi_64 = NULL, + .set_pe = npu2_set_pe, + .set_peltv = NULL, + .eeh_freeze_status = npu2_freeze_status, + .eeh_freeze_clear = NULL, + .eeh_freeze_set = NULL, + .next_error = NULL, + .err_inject = NULL, + .get_diag_data = NULL, + .get_diag_data2 = NULL, + .set_capi_mode = NULL, + .set_capp_recovery = NULL, + .tce_kill = npu2_tce_kill, +}; + +static void assign_mmio_bars(uint32_t gcid, + uint32_t scom) +{ + uint64_t mem_start; + struct npu2_bar *bar; + uint32_t i; + + mem_start = 0x6030200000000; + mem_start |= gcid << 21; + + /* + * We're going to assign the BARs in reversed order according + * to their sizes, just like the order we have in npu_bars[]. + * In that way, all BARs will be aligned perfectly without + * wasting resources. Also, the Linux kernel won't change + * anything though it attempts to reassign the BARs that + * it can see, which are NTL and GENID BARs. + * + * GLOBAL MMIO (16MB) + * PHY0 (2MB) + * PHB1 (2MB) + * NTL0 (128KB) + * NTL1 (128KB) + * NTL2 (128KB) + * NTL3 (128KB) + * NTL4 (128KB) + * NTL5 (128KB) + * GENID0 (128KB) + * GENID1 (128KB) + * GENID2 (128KB) + */ + for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) { + bar = &npu2_bars[i]; + switch (bar->type) { + case NPU2_BAR_TYPE_GLOBAL: + bar->flags |= NPU2_BAR_FLAG_ENABLED; + bar->size = 0x1000000; + break; + case NPU2_BAR_TYPE_PHY: + bar->flags |= NPU2_BAR_FLAG_ENABLED; + bar->size = 0x200000; + break; + case NPU2_BAR_TYPE_NTL: + bar->flags &= ~NPU2_BAR_FLAG_ENABLED; + bar->size = 0x20000; + break; + case NPU2_BAR_TYPE_GENID: + bar->flags &= ~NPU2_BAR_FLAG_ENABLED; + bar->size = 0x20000; + break; + default: + bar->size = 0ul; + } + + bar->base = mem_start; + mem_start += bar->size; + npu2_write_bar(NULL, bar, gcid, scom); + } +} + +/* + *Probe NPU device node and create PCI root device node + * accordingly. The NPU deivce node should specify number + * of links and xscom base address to access links. + */ +static void npu2_probe_phb(struct dt_node *dn) +{ + struct dt_node *np; + uint32_t gcid, scom, index, phb_index, links; + uint64_t reg[2], mm_win[2]; + char *path; + + /* Retrieve chip id */ + path = dt_get_path(dn); + gcid = dt_get_chip_id(dn); + index = dt_prop_get_u32(dn, "ibm,npu-index"); + phb_index = dt_prop_get_u32(dn, "ibm,phb-index"); + links = dt_prop_get_u32(dn, "ibm,npu-links"); + prlog(PR_INFO, "Chip %d Found NPU%d (%d links) at %s\n", + gcid, index, links, path); + free(path); + + /* Retrieve scom base address */ + scom = dt_get_address(dn, 0, NULL); + prlog(PR_INFO, " SCOM Base: %08x\n", scom); + + /* Reassign the BARs */ + assign_mmio_bars(gcid, scom); + + /* Global MMIO BAR */ + reg[0] = npu2_bars[0].base; + reg[1] = npu2_bars[0].size; + if (reg[0] && reg[1]) + prlog(PR_INFO, " Global MMIO BAR: %016llx (%lldMB)\n", + reg[0], reg[1] >> 20); + else + prlog(PR_ERR, " Global MMIO BAR: Disabled\n"); + + /* NTL and GENID BARs are exposed to kernel */ + mm_win[0] = npu2_bars[3].base; + mm_win[1] = npu2_bars[ARRAY_SIZE(npu2_bars) - 1].base + + npu2_bars[ARRAY_SIZE(npu2_bars) - 1].size - + mm_win[0]; + + /* Populate PCI root device node */ + np = dt_new_addr(dt_root, "pciex", reg[0]); + assert(np); + dt_add_property_strings(np, + "compatible", + "ibm,power9-npu-pciex", + "ibm,ioda2-npu2-phb"); + dt_add_property_strings(np, "device_type", "pciex"); + dt_add_property(np, "reg", reg, sizeof(reg)); + dt_add_property_cells(np, "ibm,phb-index", phb_index); + dt_add_property_cells(np, "ibm,npu-index", index); + dt_add_property_cells(np, "ibm,chip-id", gcid); + dt_add_property_cells(np, "ibm,xscom-base", scom); + dt_add_property_cells(np, "ibm,npcq", dn->phandle); + dt_add_property_cells(np, "ibm,links", links); + dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win)); +} + +static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev, + uint32_t start, + uint32_t prev_cap) +{ + struct pci_virt_device *pvd = dev->pvd; + uint32_t val; + + /* Add capability list */ + PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start); + PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_EXP); + + /* 0x00 - ID/PCIE capability */ + val = PCI_CFG_CAP_ID_EXP; + val |= ((0x2 << 16) | (PCIE_TYPE_ENDPOINT << 20)); + PCI_VIRT_CFG_INIT_RO(pvd, start, 4, val); + + /* 0x04 - Device capability + * + * We should support FLR. Oterwhsie, it might have + * problem passing it through to userland via Linux + * VFIO infrastructure + */ + val = ((PCIE_MPSS_128) | + (PCIE_PHANTOM_NONE << 3) | + (PCIE_L0SL_MAX_NO_LIMIT << 6) | + (PCIE_L1L_MAX_NO_LIMIT << 9) | + (PCICAP_EXP_DEVCAP_FUNC_RESET)); + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_DEVCAP, 4, val); + + /* 0x08 - Device control and status */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DEVCTL, 4, 0x00002810, + 0xffff0000, 0x000f0000); + + /* 0x0c - Link capability */ + val = (PCIE_LSPEED_VECBIT_2 | (PCIE_LWIDTH_1X << 4)); + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP, 4, val); + + /* 0x10 - Link control and status */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL, 4, 0x00130000, + 0xfffff000, 0xc0000000); + + /* 0x14 - Slot capability */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCAP, 4, 0x00000000); + + /* 0x18 - Slot control and status */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCTL, 4, 0x00000000); + + /* 0x1c - Root control and capability */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RC, 4, 0x00000000, + 0xffffffe0, 0x00000000); + + /* 0x20 - Root status */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RSTAT, 4, 0x00000000, + 0xffffffff, 0x00010000); + + /* 0x24 - Device capability 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCIECAP_EXP_DCAP2, 4, 0x00000000); + + /* 0x28 - Device Control and status 2 */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DCTL2, 4, 0x00070000, + 0xffff0000, 0x00000000); + + /* 0x2c - Link capability 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP2, 4, 0x00000007); + + /* 0x30 - Link control and status 2 */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL2, 4, 0x00000003, + 0xffff0000, 0x00200000); + + /* 0x34 - Slot capability 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCAP2, 4, 0x00000000); + + /* 0x38 - Slot control and status 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCTL2, 4, 0x00000000); + + return start + PCICAP_EXP_SCTL2 + 8; +} + +static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev, + uint32_t start, + uint32_t prev_cap) +{ + struct pci_virt_device *pvd = dev->pvd; + +#define NPU2_VENDOR_CAP_VERSION 0x00 +#define NPU2_VENDOR_CAP_LEN 0x10 + + /* Capbility list */ + PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start); + PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR); + dev->vendor_cap = start; + + /* Length and version */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, NPU2_VENDOR_CAP_LEN); + PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, NPU2_VENDOR_CAP_VERSION); + + /* + * Defaults when the trap can't handle the read/write (eg. due + * to reading/writing less than 4 bytes). + */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 4, 4, 0); + PCI_VIRT_CFG_INIT_RO(pvd, start + 8, 4, 0); + + /* Link index */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->index); + + return start + NPU2_VENDOR_CAP_LEN; +} + +static void npu2_populate_cfg(struct npu2_dev *dev) +{ + struct pci_virt_device *pvd = dev->pvd; + struct npu2_pcie_bar *bar; + uint32_t pos; + + /* 0x00 - Vendor/Device ID */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_VENDOR_ID, 4, 0x04ea1014); + + /* 0x04 - Command/Status */ + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_CMD, 4, 0x00100000, 0xffb802b8, + 0xf9000000); + + pci_virt_add_filter(pvd, PCI_CFG_CMD, 1, PCI_REG_FLAG_WRITE, + npu2_cfg_write_cmd, NULL); + + /* 0x08 - Rev/Class/Cache */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_REV_ID, 4, 0x06800001); + + /* 0x0c - CLS/Latency Timer/Header/BIST */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000); + + /* 0x10/14 - BAR#0, NTL BAR */ + bar = &dev->bars[0]; + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4, + (bar->base & 0xfffffff0) | (bar->flags & 0xF), + 0x0000000f, 0x00000000); + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR1, 4, (bar->base >> 32), + 0x00000000, 0x00000000); + pci_virt_add_filter(pvd, PCI_CFG_BAR0, 8, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu2_dev_cfg_bar, bar); + + /* 0x18/1c - BAR#1, GENID BAR */ + bar = &dev->bars[1]; + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR2, 4, (bar->base & 0xfffffff0) | + (bar->flags & 0xF), + 0x0000000f, 0x00000000); + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR3, 4, (bar->base >> 32), 0x00000000, + 0x00000000); + pci_virt_add_filter(pvd, PCI_CFG_BAR2, 8, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu2_dev_cfg_bar, bar); + + /* 0x20/0x24 - BARs, disabled */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR4, 4, 0x00000000); + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR5, 4, 0x00000000); + + /* 0x28 - Cardbus CIS pointer */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CARDBUS_CIS, 4, 0x00000000); + + /* 0x2c - Subsystem ID */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_SUBSYS_VENDOR_ID, 4, 0x00000000); + + /* 0x30 - ROM BAR, zero sized */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_ROMBAR, 4, 0xffffffff); + + /* 0x34 - PCI Capability */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CAP, 4, 0x00000000); + + /* 0x38 - Reserved */ + PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000); + + /* 0x3c - INT line/pin/Minimal grant/Maximal latency */ + if (!NPU2DEV_BRICK(dev)) + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); + else + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000200); + + /* PCIE and vendor specific capability */ + pos = npu2_populate_pcie_cap(dev, 0x40, PCI_CFG_CAP); + npu2_populate_vendor_cap(dev, pos, 0x41); + PCI_VIRT_CFG_INIT_RO(pvd, pos + 1, 1, 0); +} + +static uint32_t npu_allocate_bdfn(struct npu2 *p, uint32_t group) +{ + int i; + int bdfn = (group << 3); + + for (i = 0; i < p->total_devices; i++) { + if ((p->devices[i].bdfn & 0xf8) == (bdfn & 0xf8)) + bdfn++; + } + + return bdfn; +} + +static void npu2_populate_devices(struct npu2 *p, + struct dt_node *dn) +{ + struct npu2_dev *dev; + struct dt_node *npu2_dn, *link; + uint32_t npu_phandle, index = 0; + + /* + * Get the npu node which has the links which we expand here + * into pci like devices attached to our emulated phb. + */ + npu_phandle = dt_prop_get_u32(dn, "ibm,npcq"); + npu2_dn = dt_find_by_phandle(dt_root, npu_phandle); + assert(npu2_dn); + + /* Walk the link@x nodes to initialize devices */ + p->total_devices = 0; + p->phb.scan_map = 0; + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { + uint32_t group_id; + + dev = &p->devices[index]; + dev->npu = p; + dev->dt_node = link; + dev->index = dt_prop_get_u32(link, "ibm,npu-link-index"); + + group_id = dt_prop_get_u32(link, "ibm,npu-group-id"); + dev->bdfn = npu_allocate_bdfn(p, group_id); + + /* This must be done after calling + * npu_allocate_bdfn() */ + p->total_devices++; + p->phb.scan_map |= 0x1 << ((dev->bdfn & 0xf8) >> 3); + + dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask"); + + /* Populate BARs. BAR0/1 is the NTL bar. */ + dev->bars[0].npu2_bar = &npu2_bars[3 + dev->index]; + dev->bars[0].base = dev->bars[0].npu2_bar->base; + dev->bars[0].size = dev->bars[0].npu2_bar->size; + dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64; + + /* BAR2/3 is the GENID bar. */ + dev->bars[1].npu2_bar = &npu2_bars[9 + dev->index / 2]; + dev->bars[1].base = dev->bars[1].npu2_bar->base + (NPU2DEV_BRICK(dev) * 0x10000); + dev->bars[1].size = 0x10000; + dev->bars[1].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64; + + /* Initialize PCI virtual device */ + dev->pvd = pci_virt_add_device(&p->phb, dev->bdfn, 0x100, dev); + if (dev->pvd) { + p->phb.scan_map |= + 0x1 << ((dev->pvd->bdfn & 0xf8) >> 3); + npu2_populate_cfg(dev); + } + + index++; + } +} + +static void npu2_add_phb_properties(struct npu2 *p) +{ + struct dt_node *np = p->phb.dt_node; + uint32_t icsp = get_ics_phandle(); + uint64_t mm_base, mm_size, mmio_atsd; + + /* + * Add various properties that HB doesn't have to + * add, some of them simply because they result from + * policy decisions made in skiboot rather than in HB + * such as the MMIO windows going to PCI, interrupts, + * etc. + */ + dt_add_property_cells(np, "#address-cells", 3); + dt_add_property_cells(np, "#size-cells", 2); + dt_add_property_cells(np, "#interrupt-cells", 1); + dt_add_property_cells(np, "bus-range", 0, 0xff); + dt_add_property_cells(np, "clock-frequency", 0x200, 0); + dt_add_property_cells(np, "interrupt-parent", icsp); + + /* NPU PHB properties */ + dt_add_property_cells(np, "ibm,opal-num-pes", + NPU2_MAX_PE_NUM); + dt_add_property_cells(np, "ibm,opal-reserved-pe", + NPU2_RESERVED_PE_NUM); + + mmio_atsd = (u64) p->regs + + NPU2_REG_OFFSET(NPU2_STACK_ATSD, NPU2_BLOCK_ATSD0, NPU2_XTS_MMIO_ATSD_LAUNCH); + dt_add_property_cells(np, "ibm,mmio-atsd", hi32(mmio_atsd), + lo32(mmio_atsd)); + + /* + * Memory window is exposed as 64-bits non-prefetchable + * one because 64-bits prefetchable one is kind of special + * to kernel. + */ + mm_base = p->mm_base; + mm_size = p->mm_size; + dt_add_property_cells(np, "ranges", 0x02000000, + hi32(mm_base), lo32(mm_base), + hi32(mm_base), lo32(mm_base), + hi32(mm_size), lo32(mm_size)); +} + +static void npu2_create_phb(struct dt_node *dn) +{ + const struct dt_property *prop; + struct npu2 *p; + struct pci_slot *slot; + uint32_t links; + void *pmem; + + /* Retrieve number of devices */ + links = dt_prop_get_u32(dn, "ibm,links"); + pmem = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev)); + assert(pmem); + + /* Populate PHB */ + p = pmem; + p->index = dt_prop_get_u32(dn, "ibm,phb-index"); + p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id"); + p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base"); + p->total_devices = links; + + p->regs = (void *)dt_get_address(dn, 0, NULL); + + prop = dt_require_property(dn, "ibm,mmio-window", -1); + assert(prop->len >= (2 * sizeof(uint64_t))); + p->mm_base = ((const uint64_t *)prop->prop)[0]; + p->mm_size = ((const uint64_t *)prop->prop)[1]; + + p->devices = pmem + sizeof(struct npu2); + + /* Generic PHB */ + p->phb.dt_node = dn; + p->phb.ops = &npu_ops; + p->phb.phb_type = phb_type_npu_v2; + init_lock(&p->lock); + init_lock(&p->phb.lock); + list_head_init(&p->phb.devices); + list_head_init(&p->phb.virt_devices); + + npu2_populate_devices(p, dn); + npu2_add_phb_properties(p); + + slot = npu2_slot_create(&p->phb); + if (!slot) + { + /** + * @fwts-label NPUCannotCreatePHBSlot + * @fwts-advice Firmware probably ran out of memory creating + * NPU slot. NVLink functionality could be broken. + */ + prlog(PR_ERR, "NPU: Cannot create PHB slot\n"); + } + + pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID); + + npu2_init_ioda_cache(p); + npu2_hw_init(p); +} + +void probe_npu2(void) +{ + struct dt_node *np; + + /* Scan NPU XSCOM nodes */ + dt_for_each_compatible(dt_root, np, "ibm,power9-npu") + npu2_probe_phb(np); + + /* Scan newly created PHB nodes */ + dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") + npu2_create_phb(np); +} diff --git a/include/npu2.h b/include/npu2.h new file mode 100644 index 0000000..ec62ad2 --- /dev/null +++ b/include/npu2.h @@ -0,0 +1,152 @@ +/* Copyright 2013-2016 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NPU2_H +#define __NPU2_H + +/* Debugging options */ +#define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \ + (p)->phb.opal_id, ##a) +#define NPU2INF(p, fmt, a...) prlog(PR_INFO, "NPU%d: " fmt, \ + (p)->phb.opal_id, ##a) +#define NPU2ERR(p, fmt, a...) prlog(PR_ERR, "NPU%d: " fmt, \ + (p)->phb.opal_id, ##a) + +/* Number of PEs supported */ +#define NPU2_MAX_PE_NUM 16 +#define NPU2_RESERVED_PE_NUM 15 + +/* Return the stack (0-2) of a device */ +#define NPU2DEV_STACK(ndev) ((ndev)->index / 2) + +/* Return the brick number (0-1) within a stack */ +#define NPU2DEV_BRICK(ndev) ((ndev)->index % 2) + +/* This represents the state of the actual hardware BARs not the + * emulated PCIe BARs. The is a subtle difference between the two as + * not all BARs are exposed outside of skiboot. */ +struct npu2_bar { +#define NPU2_BAR_FLAG_ENABLED 0x0010 + +/* Generation ID's are a single space in the hardware but we split + * them in two for the emulated PCIe devices so we need to keep track + * of which one has been enabled/disabled. */ +#define NPU2_BAR_FLAG_ENABLED0 0x0080 +#define NPU2_BAR_FLAG_ENABLED1 0x0100 + uint32_t flags; + +#define NPU2_BAR_TYPE_GLOBAL 0 +#define NPU2_BAR_TYPE_PHY 1 +#define NPU2_BAR_TYPE_NTL 2 +#define NPU2_BAR_TYPE_GENID 3 +#define NPU2_BAR_TYPE_MAX 4 + uint32_t type; + uint64_t reg; + uint64_t stack; + uint64_t base; + uint64_t size; +}; + +/* Rpresents a BAR that is exposed via the PCIe emulated + * devices */ +struct npu2_pcie_bar { +#define NPU2_PCIE_BAR_FLAG_SIZE_HI 0x0020 +#define NPU2_PCIE_BAR_FLAG_TRAPPED 0x0040 + uint32_t flags; + struct npu2_bar *npu2_bar; + uint64_t base; + uint64_t size; +}; + +struct npu2; +struct npu2_dev { + uint32_t index; + uint32_t flags; + uint64_t xscom; + void *regs; + struct dt_node *dt_node; + struct npu2_pcie_bar bars[2]; + struct npu2 *npu; + + /* Device and function numbers are allocated based on GPU + * association. Links to connected to the same GPU will be + * exposed as different functions of the same bus/device. */ + uint32_t bdfn; + uint32_t gpu_bdfn; + + /* PCI virtual device and the associated GPU device */ + struct pci_virt_device *pvd; + struct phb *phb; + struct pci_device *pd; + + /* Vendor specific capability */ + uint32_t vendor_cap; + + /* Which PHY lanes this device is associated with */ + uint16_t lane_mask; + + /* Track currently running procedure and step number */ + uint16_t procedure_number; + uint16_t procedure_step; + uint64_t procedure_data; + unsigned long procedure_tb; + uint32_t procedure_status; + + /* Used to associate the NPU device with GPU PCI devices */ + const char *slot_label; +}; + +struct npu2 { + uint32_t index; + uint32_t flags; + uint32_t chip_id; + uint64_t xscom_base; + uint64_t at_xscom; + void *regs; + uint64_t mm_base; + uint64_t mm_size; + uint32_t base_lsi; + uint32_t total_devices; + struct npu2_dev *devices; + + /* IODA cache */ + uint64_t lxive_cache[8]; + uint64_t bdf2pe_cache[36]; + uint64_t tve_cache[16]; + bool tx_zcal_complete[2]; + + /* Used to protect global MMIO space, in particular the XTS + * tables. */ + struct lock lock; + + struct phb phb; +}; + +static inline struct npu2 *phb_to_npu2(struct phb *phb) +{ + return container_of(phb, struct npu2, phb); +} + +void npu2_write_4b(struct npu2 *p, uint64_t reg, uint64_t val); +uint64_t npu2_read_4b(struct npu2 *p, uint64_t reg); +void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val); +uint64_t npu2_read(struct npu2 *p, uint64_t reg); +void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask); +int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t len, uint32_t *data, + bool write); + +#endif /* __NPU2_H */ diff --git a/include/pci.h b/include/pci.h index 44bedf6..732c1a3 100644 --- a/include/pci.h +++ b/include/pci.h @@ -328,6 +328,7 @@ enum phb_type { phb_type_pcie_v2, phb_type_pcie_v3, phb_type_pcie_v4, + phb_type_npu_v2, }; struct phb { diff --git a/include/skiboot.h b/include/skiboot.h index c55995b..8bc767a 100644 --- a/include/skiboot.h +++ b/include/skiboot.h @@ -217,6 +217,7 @@ extern void phb3_preload_vpd(void); extern int phb4_preload_capp_ucode(void); extern void phb4_preload_vpd(void); extern void probe_npu(void); +extern void probe_npu2(void); extern void uart_init(void); extern void mbox_init(void); extern void early_uart_init(void); |