aboutsummaryrefslogtreecommitdiff
path: root/hw/npu3.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/npu3.c')
-rw-r--r--hw/npu3.c549
1 files changed, 0 insertions, 549 deletions
diff --git a/hw/npu3.c b/hw/npu3.c
deleted file mode 100644
index 0346137..0000000
--- a/hw/npu3.c
+++ /dev/null
@@ -1,549 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-/*
- * Copyright 2019 IBM Corp.
- */
-
-#include <io.h>
-#include <xscom.h>
-#include <npu3.h>
-#include <npu3-regs.h>
-#include <nvram.h>
-#include <interrupts.h>
-#include <xive.h>
-
-#define NPU3LOG(l, npu, fmt, a...) \
- prlog(l, "NPU[%d:%d]: " fmt, (npu)->chip_id, (npu)->index, ##a)
-#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a)
-#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a)
-#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a)
-
-#define NPU3DEVLOG(l, dev, fmt, a...) \
- prlog(l, "NPU[%d:%d:%d]: " fmt, \
- (dev)->npu->chip_id, \
- (dev)->npu->index, \
- (dev)->index, ##a)
-#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a)
-#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a)
-#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a)
-
-static void npu3_dt_create_link(struct dt_node *npu, uint32_t npu_index,
- uint32_t dev_index)
-{
- struct dt_node *link;
- uint32_t phy_lane_mask, ob_chiplet;
-
- link = dt_new_addr(npu, "link", dev_index);
-
- dt_add_property_string(link, "compatible", "ibm,npu-link");
- dt_add_property_cells(link, "reg", dev_index);
- dt_add_property_cells(link, "ibm,npu-link-index", dev_index);
-
- switch (npu_index) {
- case 0:
- /* fall through */
- case 2:
- ob_chiplet = npu_index ? 3 : 0;
-
- switch (dev_index) {
- case 0:
- phy_lane_mask = PPC_BITMASK32(0, 3);
- break;
- case 1:
- phy_lane_mask = PPC_BITMASK32(13, 16);
- break;
- case 2:
- phy_lane_mask = PPC_BITMASK32(7, 10);
- break;
- case 3:
- phy_lane_mask = PPC_BITMASK32(20, 23);
- break;
- }
-
- break;
- case 1:
- switch (dev_index) {
- case 0:
- ob_chiplet = 1;
- phy_lane_mask = PPC_BITMASK32(0, 3);
- break;
- case 1:
- ob_chiplet = 2;
- phy_lane_mask = PPC_BITMASK32(0, 3);
- break;
- case 2:
- ob_chiplet = 1;
- phy_lane_mask = PPC_BITMASK32(7, 10);
- break;
- case 3:
- ob_chiplet = 2;
- phy_lane_mask = PPC_BITMASK32(7, 10);
- break;
- }
-
- break;
- default:
- return;
- }
-
- dt_add_property_cells(link, "ibm,npu-phy", ob_chiplet);
- dt_add_property_cells(link, "ibm,npu-lane-mask", phy_lane_mask);
-}
-
-static void npu3_dt_create_npu(struct dt_node *xscom, uint32_t npu_index)
-{
- const uint32_t npu_base[] = { 0x5011000, 0x5011400, 0x3011c00 };
- struct dt_node *npu;
-
- npu = dt_new_addr(xscom, "npu", npu_base[npu_index]);
-
- dt_add_property_cells(npu, "#size-cells", 0);
- dt_add_property_cells(npu, "#address-cells", 1);
- dt_add_property_cells(npu, "reg", npu_base[npu_index], 0x2c);
- dt_add_property_string(npu, "compatible", "ibm,power9-npu3");
- dt_add_property_cells(npu, "ibm,npu-index", npu_index);
-
- for (uint32_t i = 0; i < NPU3_LINKS_PER_NPU; i++)
- npu3_dt_create_link(npu, npu_index, i);
-}
-
-/* This can be removed when/if we decide to use HDAT instead */
-static bool npu3_dt_create(void)
-{
- struct proc_chip *chip = next_chip(NULL);
- struct dt_node *xscom;
-
- /* npu3 chips only */
- if (proc_gen < proc_gen_p9 ||
- chip->type == PROC_CHIP_P9_NIMBUS ||
- chip->type == PROC_CHIP_P9_CUMULUS)
- return false;
-
- dt_for_each_compatible(dt_root, xscom, "ibm,xscom")
- for (uint32_t i = 0; i < 3; i++)
- npu3_dt_create_npu(xscom, i);
-
- return true;
-}
-
-static struct npu3 *npu3_create(struct dt_node *dn)
-{
- struct npu3 *npu;
- struct dt_node *link;
- struct npu3_dev *dev;
- char *path;
- uint32_t i;
-
- npu = zalloc(sizeof(*npu));
- assert(npu);
-
- init_lock(&npu->lock);
-
- npu->dt_node = dn;
- npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
- npu->xscom_base = dt_get_address(dn, 0, NULL);
-
- npu->chip_id = dt_get_chip_id(dn);
- assert(get_chip(npu->chip_id));
-
- dt_for_each_compatible(dn, link, "ibm,npu-link") {
- i = dt_prop_get_u32(link, "ibm,npu-link-index");
- assert(i < NPU3_LINKS_PER_NPU);
-
- dev = &npu->devices[i];
- dev->index = i;
- dev->npu = npu;
- dev->dn = link;
- dev->ob_chiplet = dt_prop_get_u32(link, "ibm,npu-phy");
- dev->phy_lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
- dev->proc.status = NPU3_PROC_COMPLETE;
- };
-
- path = dt_get_path(dn);
- NPU3INF(npu, "Found %s\n", path);
- NPU3INF(npu, "SCOM base: 0x%llx\n", npu->xscom_base);
- free(path);
-
- return npu;
-}
-
-struct npu3_dev *npu3_next_dev(struct npu3 *npu, struct npu3_dev *dev,
- enum npu3_dev_type type)
-{
- uint32_t i = 0;
-
- if (dev)
- i = dev->index + 1;
-
- for (; i < NPU3_LINKS_PER_NPU; i++) {
- dev = &npu->devices[i];
-
- if (dev->type == type || type == NPU3_DEV_TYPE_ANY)
- return dev;
- }
-
- return NULL;
-}
-
-static void npu3_device_detect_fixup(struct npu3_dev *dev)
-{
- struct dt_node *dn = dev->dn;
-
- if (dev->type == NPU3_DEV_TYPE_NVLINK) {
- dt_add_property_strings(dn, "ibm,npu-link-type", "nvlink");
- dev->link_speed = dt_prop_get_u32_def(
- dn, "nvidia,link-speed", 0xff);
- return;
- }
-
- NPU3DEVDBG(dev, "Link type unknown\n");
- dt_add_property_strings(dn, "ibm,npu-link-type", "unknown");
-}
-
-/*
- * We use the indirect method because it uses the same addresses as
- * the MMIO offsets (NPU RING)
- */
-static void npu3_scom_sel(struct npu3 *npu, uint64_t reg, uint64_t size)
-{
- uint64_t val;
-
- val = SETFIELD(NPU3_MISC_DA_ADDR, 0ull, reg);
- val = SETFIELD(NPU3_MISC_DA_LEN, val, size);
- xscom_write(npu->chip_id,
- npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_ADDR,
- val);
-}
-
-static void npu3_scom_write(struct npu3 *npu, uint64_t reg, uint64_t size,
- uint64_t val)
-{
- npu3_scom_sel(npu, reg, size);
- xscom_write(npu->chip_id,
- npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA,
- val);
-}
-
-static uint64_t npu3_scom_read(struct npu3 *npu, uint64_t reg, uint64_t size)
-{
- uint64_t val;
-
- npu3_scom_sel(npu, reg, size);
- xscom_read(npu->chip_id,
- npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA,
- &val);
-
- return val;
-}
-
-void npu3_write(struct npu3 *npu, uint64_t reg, uint64_t val)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- out_be64(mmio + reg, val);
- else
- npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_8B, val);
-
- /* CQ_SM writes should be mirrored in all four blocks */
- if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0))
- return;
-
- for (uint32_t i = 1; i < 4; i++)
- npu3_write(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg),
- val);
-}
-
-uint64_t npu3_read(struct npu3 *npu, uint64_t reg)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- return in_be64(mmio + reg);
-
- return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_8B);
-}
-
-void npu3_write_4b(struct npu3 *npu, uint64_t reg, uint32_t val)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- out_be32(mmio + reg, val);
- else
- npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_4B,
- (uint64_t)val << 32);
-
- if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0))
- return;
-
- for (uint32_t i = 1; i < 4; i++)
- npu3_write_4b(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg),
- val);
-}
-
-uint32_t npu3_read_4b(struct npu3 *npu, uint64_t reg)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- return in_be32(mmio + reg);
-
- return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_4B) >> 32;
-}
-
-static void npu3_misc_config(struct npu3 *npu)
-{
- struct npu3_dev *dev;
- uint32_t typemap = 0;
- uint64_t reg, val;
-
- npu3_for_each_nvlink_dev(dev, npu)
- typemap |= 0x10 >> dev->index;
-
- reg = NPU3_MCP_MISC_CFG0;
- val = npu3_read(npu, reg);
- val |= NPU3_MCP_MISC_CFG0_ENABLE_PBUS;
- val &= ~NPU3_MCP_MISC_CFG0_ENABLE_SNARF_CPM;
- val = SETFIELD(NPU3_MCP_MISC_CFG0_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_MCP_MISC_CFG0_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-
- reg = NPU3_SNP_MISC_CFG0;
- val = npu3_read(npu, reg);
- val |= NPU3_SNP_MISC_CFG0_ENABLE_PBUS;
- val = SETFIELD(NPU3_SNP_MISC_CFG0_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_SNP_MISC_CFG0_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-
- reg = NPU3_CTL_MISC_CFG2;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_CTL_MISC_CFG2_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_CTL_MISC_CFG2_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-
- reg = NPU3_DAT_MISC_CFG1;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_DAT_MISC_CFG1_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_DAT_MISC_CFG1_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-}
-
-static void npu3_assign_bars(struct npu3 *npu)
-{
- struct npu3_dev *dev;
- uint64_t addr, size, val;
-
- /* Global MMIO bar (per npu) */
- phys_map_get(npu->chip_id, NPU_REGS, npu->index, &addr, &size);
- val = SETFIELD(NPU3_MMIO_BAR_ADDR, 0ull, addr >> 24);
- val |= NPU3_MMIO_BAR_ENABLE;
- npu3_write(npu, NPU3_MMIO_BAR, val);
-
- NPU3INF(npu, "MMIO base: 0x%016llx (%lldMB)\n", addr, size >> 20);
- npu->regs[0] = addr;
- npu->regs[1] = size;
-
- /* NTL bar (per device) */
- npu3_for_each_dev(dev, npu) {
- phys_map_get(npu->chip_id, NPU_NTL, npu3_chip_dev_index(dev),
- &addr, &size);
- val = SETFIELD(NPU3_NTL_BAR_ADDR, 0ull, addr >> 16);
- val = SETFIELD(NPU3_NTL_BAR_SIZE, val, ilog2(size >> 16));
- npu3_write(npu, NPU3_NTL_BAR(dev->index), val);
-
- dev->ntl_bar.addr = addr;
- dev->ntl_bar.size = size;
- }
-
- /* GENID bar (logically divided per device) */
- phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, NULL);
- val = SETFIELD(NPU3_GENID_BAR_ADDR, 0ull, addr >> 19);
- npu3_write(npu, NPU3_GENID_BAR, val);
-
- npu3_for_each_dev(dev, npu) {
- dev->genid_bar.addr = addr + (dev->index << 16);
- dev->genid_bar.size = 64 << 10;
- }
-}
-
-void npu3_dev_enable_bars(struct npu3_dev *dev, bool enable)
-{
- struct npu3 *npu = dev->npu;
- uint64_t reg, val;
-
- if (dev->ntl_bar.enable == enable) /* No state change */
- return;
-
- dev->ntl_bar.enable = enable;
- dev->genid_bar.enable = enable;
-
- reg = NPU3_NTL_BAR(dev->index);
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_NTL_BAR_ENABLE, val, enable);
- npu3_write(npu, reg, val);
-
- /*
- * Generation IDs are a single space in the hardware but we split them
- * per device. Only disable in hardware if every device has disabled.
- */
- if (!enable)
- npu3_for_each_dev(dev, npu)
- if (dev->genid_bar.enable)
- return;
-
- reg = NPU3_GENID_BAR;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_GENID_BAR_ENABLE, val, enable);
- npu3_write(npu, reg, val);
-}
-
-static uint64_t npu3_ipi_attributes(struct irq_source *is, uint32_t isn)
-{
- struct npu3 *npu = is->data;
- uint32_t level = isn - npu->irq_base;
-
- /* TCE interrupt is used to detect a frozen PE */
- if (level == 18)
- return IRQ_ATTR_TARGET_OPAL |
- IRQ_ATTR_TARGET_RARE |
- IRQ_ATTR_TYPE_MSI;
-
- return IRQ_ATTR_TARGET_LINUX;
-}
-
-static void npu3_ipi_interrupt(struct irq_source *is, uint32_t isn)
-{
- struct npu3 *npu = is->data;
- uint32_t level = isn - npu->irq_base;
-
- if (level != 18) {
- NPU3ERR(npu, "Received unknown interrupt %d\n", level);
- return;
- }
-
- opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
-}
-
-#define NPU3_IRQ_LEVELS 60
-
-static char *npu3_ipi_name(struct irq_source *is, uint32_t isn)
-{
- struct npu3 *npu = is->data;
- uint32_t level = isn - npu->irq_base;
- static const char *names[NPU3_IRQ_LEVELS] = {
- [0] = "NDL 0 Stall Event (brick 0)",
- [1] = "NDL 0 No-Stall Event (brick 0)",
- [2] = "NDL 1 Stall Event (brick 1)",
- [3] = "NDL 1 No-Stall Event (brick 1)",
- [4] = "NDL 2 Stall Event (brick 2)",
- [5] = "NDL 2 No-Stall Event (brick 2)",
- [6] = "NDL 3 Stall Event (brick 3)",
- [7] = "NDL 3 No-Stall Event (brick 3)",
- [8] = "NDL 4 Stall Event (brick 4)",
- [9] = "NDL 4 No-Stall Event (brick 4)",
- [10] = "NDL 5 Stall Event (brick 5)",
- [11] = "NDL 5 No-Stall Event (brick 5)",
- [12] = "NTL 0 Event",
- [13] = "NTL 1 Event",
- [14] = "NTL 2 Event",
- [15] = "NTL 3 Event",
- [16] = "NTL 4 Event",
- [17] = "NTL 5 Event",
- [18] = "TCE Event",
- [19] = "ATS Event",
- [20] = "CQ Event",
- [21] = "MISC Event",
- [41] = "Memory Controller Event",
- [42] = "NDL 6 Stall Event (brick 6)",
- [43] = "NDL 6 No-Stall Event (brick 6)",
- [44] = "NDL 7 Stall Event (brick 7)",
- [45] = "NDL 7 No-Stall Event (brick 7)",
- [46] = "NDL 8 Stall Event (brick 8)",
- [47] = "NDL 8 No-Stall Event (brick 8)",
- [48] = "NDL 9 Stall Event (brick 9)",
- [49] = "NDL 9 No-Stall Event (brick 9)",
- [50] = "NDL 10 Stall Event (brick 10)",
- [51] = "NDL 10 No-Stall Event (brick 10)",
- [52] = "NDL 11 Stall Event (brick 11)",
- [53] = "NDL 11 No-Stall Event (brick 11)",
- [54] = "NTL 6 Event",
- [55] = "NTL 7 Event",
- [56] = "NTL 8 Event",
- [57] = "NTL 9 Event",
- [58] = "NTL 10 Event",
- [59] = "NTL 11 Event",
- };
-
- if (level >= NPU3_IRQ_LEVELS || !names[level])
- return strdup("Unknown");
-
- return strdup(names[level]);
-}
-
-static const struct irq_source_ops npu3_ipi_ops = {
- .attributes = npu3_ipi_attributes,
- .interrupt = npu3_ipi_interrupt,
- .name = npu3_ipi_name,
-};
-
-static void npu3_setup_irqs(struct npu3 *npu)
-{
- uint64_t reg, val;
- uint32_t base;
-
- base = xive_alloc_ipi_irqs(npu->chip_id, NPU3_IRQ_LEVELS, 64);
- if (base == XIVE_IRQ_ERROR) {
- NPU3ERR(npu, "Failed to allocate interrupt sources\n");
- return;
- }
-
- xive_register_ipi_source(base, NPU3_IRQ_LEVELS, npu, &npu3_ipi_ops);
-
- /* Set IPI configuration */
- reg = NPU3_MISC_CFG;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_MISC_CFG_IPI_PS, val, NPU3_MISC_CFG_IPI_PS_64K);
- val = SETFIELD(NPU3_MISC_CFG_IPI_OS, val, NPU3_MISC_CFG_IPI_OS_AIX);
- npu3_write(npu, reg, val);
-
- /* Set IRQ base */
- reg = NPU3_MISC_INT_BAR;
- val = SETFIELD(NPU3_MISC_INT_BAR_ADDR, 0ull,
- (uint64_t)xive_get_trigger_port(base) >> 12);
- npu3_write(npu, reg, val);
-
- npu->irq_base = base;
-}
-
-static void npu3_init(struct npu3 *npu)
-{
- struct npu3_dev *dev;
-
- platform.npu3_device_detect(npu);
- npu3_for_each_dev(dev, npu)
- npu3_device_detect_fixup(dev);
-
- npu3_misc_config(npu);
- npu3_assign_bars(npu);
- npu3_setup_irqs(npu);
- npu3_init_nvlink(npu);
-}
-
-void probe_npu3(void)
-{
- struct dt_node *dn;
- struct npu3 *npu;
-
- if (!npu3_dt_create())
- return;
-
- if (!platform.npu3_device_detect) {
- prlog(PR_INFO, "NPU: Platform does not support NPU\n");
- return;
- }
-
- dt_for_each_compatible(dt_root, dn, "ibm,power9-npu3") {
- npu = npu3_create(dn);
- npu3_init(npu);
- }
-}