diff options
Diffstat (limited to 'hw/npu3.c')
-rw-r--r-- | hw/npu3.c | 549 |
1 files changed, 0 insertions, 549 deletions
diff --git a/hw/npu3.c b/hw/npu3.c deleted file mode 100644 index 0346137..0000000 --- a/hw/npu3.c +++ /dev/null @@ -1,549 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later -/* - * Copyright 2019 IBM Corp. - */ - -#include <io.h> -#include <xscom.h> -#include <npu3.h> -#include <npu3-regs.h> -#include <nvram.h> -#include <interrupts.h> -#include <xive.h> - -#define NPU3LOG(l, npu, fmt, a...) \ - prlog(l, "NPU[%d:%d]: " fmt, (npu)->chip_id, (npu)->index, ##a) -#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a) -#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a) -#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a) - -#define NPU3DEVLOG(l, dev, fmt, a...) \ - prlog(l, "NPU[%d:%d:%d]: " fmt, \ - (dev)->npu->chip_id, \ - (dev)->npu->index, \ - (dev)->index, ##a) -#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a) -#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a) -#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a) - -static void npu3_dt_create_link(struct dt_node *npu, uint32_t npu_index, - uint32_t dev_index) -{ - struct dt_node *link; - uint32_t phy_lane_mask, ob_chiplet; - - link = dt_new_addr(npu, "link", dev_index); - - dt_add_property_string(link, "compatible", "ibm,npu-link"); - dt_add_property_cells(link, "reg", dev_index); - dt_add_property_cells(link, "ibm,npu-link-index", dev_index); - - switch (npu_index) { - case 0: - /* fall through */ - case 2: - ob_chiplet = npu_index ? 3 : 0; - - switch (dev_index) { - case 0: - phy_lane_mask = PPC_BITMASK32(0, 3); - break; - case 1: - phy_lane_mask = PPC_BITMASK32(13, 16); - break; - case 2: - phy_lane_mask = PPC_BITMASK32(7, 10); - break; - case 3: - phy_lane_mask = PPC_BITMASK32(20, 23); - break; - } - - break; - case 1: - switch (dev_index) { - case 0: - ob_chiplet = 1; - phy_lane_mask = PPC_BITMASK32(0, 3); - break; - case 1: - ob_chiplet = 2; - phy_lane_mask = PPC_BITMASK32(0, 3); - break; - case 2: - ob_chiplet = 1; - phy_lane_mask = PPC_BITMASK32(7, 10); - break; - case 3: - ob_chiplet = 2; - phy_lane_mask = PPC_BITMASK32(7, 10); - break; - } - - break; - default: - return; - } - - dt_add_property_cells(link, "ibm,npu-phy", ob_chiplet); - dt_add_property_cells(link, "ibm,npu-lane-mask", phy_lane_mask); -} - -static void npu3_dt_create_npu(struct dt_node *xscom, uint32_t npu_index) -{ - const uint32_t npu_base[] = { 0x5011000, 0x5011400, 0x3011c00 }; - struct dt_node *npu; - - npu = dt_new_addr(xscom, "npu", npu_base[npu_index]); - - dt_add_property_cells(npu, "#size-cells", 0); - dt_add_property_cells(npu, "#address-cells", 1); - dt_add_property_cells(npu, "reg", npu_base[npu_index], 0x2c); - dt_add_property_string(npu, "compatible", "ibm,power9-npu3"); - dt_add_property_cells(npu, "ibm,npu-index", npu_index); - - for (uint32_t i = 0; i < NPU3_LINKS_PER_NPU; i++) - npu3_dt_create_link(npu, npu_index, i); -} - -/* This can be removed when/if we decide to use HDAT instead */ -static bool npu3_dt_create(void) -{ - struct proc_chip *chip = next_chip(NULL); - struct dt_node *xscom; - - /* npu3 chips only */ - if (proc_gen < proc_gen_p9 || - chip->type == PROC_CHIP_P9_NIMBUS || - chip->type == PROC_CHIP_P9_CUMULUS) - return false; - - dt_for_each_compatible(dt_root, xscom, "ibm,xscom") - for (uint32_t i = 0; i < 3; i++) - npu3_dt_create_npu(xscom, i); - - return true; -} - -static struct npu3 *npu3_create(struct dt_node *dn) -{ - struct npu3 *npu; - struct dt_node *link; - struct npu3_dev *dev; - char *path; - uint32_t i; - - npu = zalloc(sizeof(*npu)); - assert(npu); - - init_lock(&npu->lock); - - npu->dt_node = dn; - npu->index = dt_prop_get_u32(dn, "ibm,npu-index"); - npu->xscom_base = dt_get_address(dn, 0, NULL); - - npu->chip_id = dt_get_chip_id(dn); - assert(get_chip(npu->chip_id)); - - dt_for_each_compatible(dn, link, "ibm,npu-link") { - i = dt_prop_get_u32(link, "ibm,npu-link-index"); - assert(i < NPU3_LINKS_PER_NPU); - - dev = &npu->devices[i]; - dev->index = i; - dev->npu = npu; - dev->dn = link; - dev->ob_chiplet = dt_prop_get_u32(link, "ibm,npu-phy"); - dev->phy_lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask"); - dev->proc.status = NPU3_PROC_COMPLETE; - }; - - path = dt_get_path(dn); - NPU3INF(npu, "Found %s\n", path); - NPU3INF(npu, "SCOM base: 0x%llx\n", npu->xscom_base); - free(path); - - return npu; -} - -struct npu3_dev *npu3_next_dev(struct npu3 *npu, struct npu3_dev *dev, - enum npu3_dev_type type) -{ - uint32_t i = 0; - - if (dev) - i = dev->index + 1; - - for (; i < NPU3_LINKS_PER_NPU; i++) { - dev = &npu->devices[i]; - - if (dev->type == type || type == NPU3_DEV_TYPE_ANY) - return dev; - } - - return NULL; -} - -static void npu3_device_detect_fixup(struct npu3_dev *dev) -{ - struct dt_node *dn = dev->dn; - - if (dev->type == NPU3_DEV_TYPE_NVLINK) { - dt_add_property_strings(dn, "ibm,npu-link-type", "nvlink"); - dev->link_speed = dt_prop_get_u32_def( - dn, "nvidia,link-speed", 0xff); - return; - } - - NPU3DEVDBG(dev, "Link type unknown\n"); - dt_add_property_strings(dn, "ibm,npu-link-type", "unknown"); -} - -/* - * We use the indirect method because it uses the same addresses as - * the MMIO offsets (NPU RING) - */ -static void npu3_scom_sel(struct npu3 *npu, uint64_t reg, uint64_t size) -{ - uint64_t val; - - val = SETFIELD(NPU3_MISC_DA_ADDR, 0ull, reg); - val = SETFIELD(NPU3_MISC_DA_LEN, val, size); - xscom_write(npu->chip_id, - npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_ADDR, - val); -} - -static void npu3_scom_write(struct npu3 *npu, uint64_t reg, uint64_t size, - uint64_t val) -{ - npu3_scom_sel(npu, reg, size); - xscom_write(npu->chip_id, - npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA, - val); -} - -static uint64_t npu3_scom_read(struct npu3 *npu, uint64_t reg, uint64_t size) -{ - uint64_t val; - - npu3_scom_sel(npu, reg, size); - xscom_read(npu->chip_id, - npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA, - &val); - - return val; -} - -void npu3_write(struct npu3 *npu, uint64_t reg, uint64_t val) -{ - void *mmio = (void *)npu->regs[0]; - - if (mmio) - out_be64(mmio + reg, val); - else - npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_8B, val); - - /* CQ_SM writes should be mirrored in all four blocks */ - if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0)) - return; - - for (uint32_t i = 1; i < 4; i++) - npu3_write(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg), - val); -} - -uint64_t npu3_read(struct npu3 *npu, uint64_t reg) -{ - void *mmio = (void *)npu->regs[0]; - - if (mmio) - return in_be64(mmio + reg); - - return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_8B); -} - -void npu3_write_4b(struct npu3 *npu, uint64_t reg, uint32_t val) -{ - void *mmio = (void *)npu->regs[0]; - - if (mmio) - out_be32(mmio + reg, val); - else - npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_4B, - (uint64_t)val << 32); - - if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0)) - return; - - for (uint32_t i = 1; i < 4; i++) - npu3_write_4b(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg), - val); -} - -uint32_t npu3_read_4b(struct npu3 *npu, uint64_t reg) -{ - void *mmio = (void *)npu->regs[0]; - - if (mmio) - return in_be32(mmio + reg); - - return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_4B) >> 32; -} - -static void npu3_misc_config(struct npu3 *npu) -{ - struct npu3_dev *dev; - uint32_t typemap = 0; - uint64_t reg, val; - - npu3_for_each_nvlink_dev(dev, npu) - typemap |= 0x10 >> dev->index; - - reg = NPU3_MCP_MISC_CFG0; - val = npu3_read(npu, reg); - val |= NPU3_MCP_MISC_CFG0_ENABLE_PBUS; - val &= ~NPU3_MCP_MISC_CFG0_ENABLE_SNARF_CPM; - val = SETFIELD(NPU3_MCP_MISC_CFG0_NVLINK_MODE, val, typemap); - val = SETFIELD(NPU3_MCP_MISC_CFG0_OCAPI_MODE, val, ~typemap); - npu3_write(npu, reg, val); - - reg = NPU3_SNP_MISC_CFG0; - val = npu3_read(npu, reg); - val |= NPU3_SNP_MISC_CFG0_ENABLE_PBUS; - val = SETFIELD(NPU3_SNP_MISC_CFG0_NVLINK_MODE, val, typemap); - val = SETFIELD(NPU3_SNP_MISC_CFG0_OCAPI_MODE, val, ~typemap); - npu3_write(npu, reg, val); - - reg = NPU3_CTL_MISC_CFG2; - val = npu3_read(npu, reg); - val = SETFIELD(NPU3_CTL_MISC_CFG2_NVLINK_MODE, val, typemap); - val = SETFIELD(NPU3_CTL_MISC_CFG2_OCAPI_MODE, val, ~typemap); - npu3_write(npu, reg, val); - - reg = NPU3_DAT_MISC_CFG1; - val = npu3_read(npu, reg); - val = SETFIELD(NPU3_DAT_MISC_CFG1_NVLINK_MODE, val, typemap); - val = SETFIELD(NPU3_DAT_MISC_CFG1_OCAPI_MODE, val, ~typemap); - npu3_write(npu, reg, val); -} - -static void npu3_assign_bars(struct npu3 *npu) -{ - struct npu3_dev *dev; - uint64_t addr, size, val; - - /* Global MMIO bar (per npu) */ - phys_map_get(npu->chip_id, NPU_REGS, npu->index, &addr, &size); - val = SETFIELD(NPU3_MMIO_BAR_ADDR, 0ull, addr >> 24); - val |= NPU3_MMIO_BAR_ENABLE; - npu3_write(npu, NPU3_MMIO_BAR, val); - - NPU3INF(npu, "MMIO base: 0x%016llx (%lldMB)\n", addr, size >> 20); - npu->regs[0] = addr; - npu->regs[1] = size; - - /* NTL bar (per device) */ - npu3_for_each_dev(dev, npu) { - phys_map_get(npu->chip_id, NPU_NTL, npu3_chip_dev_index(dev), - &addr, &size); - val = SETFIELD(NPU3_NTL_BAR_ADDR, 0ull, addr >> 16); - val = SETFIELD(NPU3_NTL_BAR_SIZE, val, ilog2(size >> 16)); - npu3_write(npu, NPU3_NTL_BAR(dev->index), val); - - dev->ntl_bar.addr = addr; - dev->ntl_bar.size = size; - } - - /* GENID bar (logically divided per device) */ - phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, NULL); - val = SETFIELD(NPU3_GENID_BAR_ADDR, 0ull, addr >> 19); - npu3_write(npu, NPU3_GENID_BAR, val); - - npu3_for_each_dev(dev, npu) { - dev->genid_bar.addr = addr + (dev->index << 16); - dev->genid_bar.size = 64 << 10; - } -} - -void npu3_dev_enable_bars(struct npu3_dev *dev, bool enable) -{ - struct npu3 *npu = dev->npu; - uint64_t reg, val; - - if (dev->ntl_bar.enable == enable) /* No state change */ - return; - - dev->ntl_bar.enable = enable; - dev->genid_bar.enable = enable; - - reg = NPU3_NTL_BAR(dev->index); - val = npu3_read(npu, reg); - val = SETFIELD(NPU3_NTL_BAR_ENABLE, val, enable); - npu3_write(npu, reg, val); - - /* - * Generation IDs are a single space in the hardware but we split them - * per device. Only disable in hardware if every device has disabled. - */ - if (!enable) - npu3_for_each_dev(dev, npu) - if (dev->genid_bar.enable) - return; - - reg = NPU3_GENID_BAR; - val = npu3_read(npu, reg); - val = SETFIELD(NPU3_GENID_BAR_ENABLE, val, enable); - npu3_write(npu, reg, val); -} - -static uint64_t npu3_ipi_attributes(struct irq_source *is, uint32_t isn) -{ - struct npu3 *npu = is->data; - uint32_t level = isn - npu->irq_base; - - /* TCE interrupt is used to detect a frozen PE */ - if (level == 18) - return IRQ_ATTR_TARGET_OPAL | - IRQ_ATTR_TARGET_RARE | - IRQ_ATTR_TYPE_MSI; - - return IRQ_ATTR_TARGET_LINUX; -} - -static void npu3_ipi_interrupt(struct irq_source *is, uint32_t isn) -{ - struct npu3 *npu = is->data; - uint32_t level = isn - npu->irq_base; - - if (level != 18) { - NPU3ERR(npu, "Received unknown interrupt %d\n", level); - return; - } - - opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR); -} - -#define NPU3_IRQ_LEVELS 60 - -static char *npu3_ipi_name(struct irq_source *is, uint32_t isn) -{ - struct npu3 *npu = is->data; - uint32_t level = isn - npu->irq_base; - static const char *names[NPU3_IRQ_LEVELS] = { - [0] = "NDL 0 Stall Event (brick 0)", - [1] = "NDL 0 No-Stall Event (brick 0)", - [2] = "NDL 1 Stall Event (brick 1)", - [3] = "NDL 1 No-Stall Event (brick 1)", - [4] = "NDL 2 Stall Event (brick 2)", - [5] = "NDL 2 No-Stall Event (brick 2)", - [6] = "NDL 3 Stall Event (brick 3)", - [7] = "NDL 3 No-Stall Event (brick 3)", - [8] = "NDL 4 Stall Event (brick 4)", - [9] = "NDL 4 No-Stall Event (brick 4)", - [10] = "NDL 5 Stall Event (brick 5)", - [11] = "NDL 5 No-Stall Event (brick 5)", - [12] = "NTL 0 Event", - [13] = "NTL 1 Event", - [14] = "NTL 2 Event", - [15] = "NTL 3 Event", - [16] = "NTL 4 Event", - [17] = "NTL 5 Event", - [18] = "TCE Event", - [19] = "ATS Event", - [20] = "CQ Event", - [21] = "MISC Event", - [41] = "Memory Controller Event", - [42] = "NDL 6 Stall Event (brick 6)", - [43] = "NDL 6 No-Stall Event (brick 6)", - [44] = "NDL 7 Stall Event (brick 7)", - [45] = "NDL 7 No-Stall Event (brick 7)", - [46] = "NDL 8 Stall Event (brick 8)", - [47] = "NDL 8 No-Stall Event (brick 8)", - [48] = "NDL 9 Stall Event (brick 9)", - [49] = "NDL 9 No-Stall Event (brick 9)", - [50] = "NDL 10 Stall Event (brick 10)", - [51] = "NDL 10 No-Stall Event (brick 10)", - [52] = "NDL 11 Stall Event (brick 11)", - [53] = "NDL 11 No-Stall Event (brick 11)", - [54] = "NTL 6 Event", - [55] = "NTL 7 Event", - [56] = "NTL 8 Event", - [57] = "NTL 9 Event", - [58] = "NTL 10 Event", - [59] = "NTL 11 Event", - }; - - if (level >= NPU3_IRQ_LEVELS || !names[level]) - return strdup("Unknown"); - - return strdup(names[level]); -} - -static const struct irq_source_ops npu3_ipi_ops = { - .attributes = npu3_ipi_attributes, - .interrupt = npu3_ipi_interrupt, - .name = npu3_ipi_name, -}; - -static void npu3_setup_irqs(struct npu3 *npu) -{ - uint64_t reg, val; - uint32_t base; - - base = xive_alloc_ipi_irqs(npu->chip_id, NPU3_IRQ_LEVELS, 64); - if (base == XIVE_IRQ_ERROR) { - NPU3ERR(npu, "Failed to allocate interrupt sources\n"); - return; - } - - xive_register_ipi_source(base, NPU3_IRQ_LEVELS, npu, &npu3_ipi_ops); - - /* Set IPI configuration */ - reg = NPU3_MISC_CFG; - val = npu3_read(npu, reg); - val = SETFIELD(NPU3_MISC_CFG_IPI_PS, val, NPU3_MISC_CFG_IPI_PS_64K); - val = SETFIELD(NPU3_MISC_CFG_IPI_OS, val, NPU3_MISC_CFG_IPI_OS_AIX); - npu3_write(npu, reg, val); - - /* Set IRQ base */ - reg = NPU3_MISC_INT_BAR; - val = SETFIELD(NPU3_MISC_INT_BAR_ADDR, 0ull, - (uint64_t)xive_get_trigger_port(base) >> 12); - npu3_write(npu, reg, val); - - npu->irq_base = base; -} - -static void npu3_init(struct npu3 *npu) -{ - struct npu3_dev *dev; - - platform.npu3_device_detect(npu); - npu3_for_each_dev(dev, npu) - npu3_device_detect_fixup(dev); - - npu3_misc_config(npu); - npu3_assign_bars(npu); - npu3_setup_irqs(npu); - npu3_init_nvlink(npu); -} - -void probe_npu3(void) -{ - struct dt_node *dn; - struct npu3 *npu; - - if (!npu3_dt_create()) - return; - - if (!platform.npu3_device_detect) { - prlog(PR_INFO, "NPU: Platform does not support NPU\n"); - return; - } - - dt_for_each_compatible(dt_root, dn, "ibm,power9-npu3") { - npu = npu3_create(dn); - npu3_init(npu); - } -} |