From 7ecb29651c311e35e30fbbebc675cae3e97c8d53 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 14 Sep 2018 13:46:23 +1000 Subject: npu2: Split device index into brick and link index On Witherspoon, OpenCAPI devices attached to link indexes 0 and 1 are handled by bricks 2 and 3. Rename index to brick_index, and add a new field, link_index, to refer to the link index. For now, we set those values identically. Signed-off-by: Andrew Donnellan Acked-by: Reza Arbab Reviewed-by: Alistair Popple Reviewed-by: Frederic Barrat Signed-off-by: Stewart Smith --- hw/npu2-hw-procedures.c | 8 ++--- hw/npu2-opencapi.c | 77 +++++++++++++++++++++++++------------------------ hw/npu2.c | 21 ++++++++------ include/npu2-regs.h | 14 ++++----- include/npu2.h | 7 +++-- 5 files changed, 67 insertions(+), 60 deletions(-) diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c index c30e1b0..fa64075 100644 --- a/hw/npu2-hw-procedures.c +++ b/hw/npu2-hw-procedures.c @@ -199,7 +199,7 @@ DEFINE_PROCEDURE(nop); /* Return the brick number (0-2) within an obus chiplet */ static int obus_brick_index(struct npu2_dev *ndev) { - int index = ndev->index % 3; + int index = ndev->brick_index % 3; /* On the second obus chiplet, index is reversed */ if ((ndev->pl_xscom_base & 0x3F000000) != 0x09000000) @@ -433,7 +433,7 @@ DEFINE_PROCEDURE(phy_reset, phy_reset_wait, phy_reset_complete); /* Procedure 1.2.6 - I/O PHY Tx Impedance Calibration */ static uint32_t phy_tx_zcal(struct npu2_dev *ndev) { - if (ndev->npu->tx_zcal_complete[ndev->index > 2]) + if (ndev->npu->tx_zcal_complete[ndev->brick_index > 2]) return PROCEDURE_COMPLETE; /* Turn off SW enable and enable zcal state machine */ @@ -604,7 +604,7 @@ static uint32_t phy_tx_zcal_calculate(struct npu2_dev *ndev) phy_write(ndev, &NPU2_PHY_TX_MARGINPU_SELECT, therm(margin_select + 1)/2); phy_write(ndev, &NPU2_PHY_TX_MARGINPD_SELECT, therm(margin_select + 1)/2); - ndev->npu->tx_zcal_complete[ndev->index > 2] = 1; + ndev->npu->tx_zcal_complete[ndev->brick_index > 2] = 1; return PROCEDURE_COMPLETE; } DEFINE_PROCEDURE(phy_tx_zcal, phy_tx_zcal_wait, phy_tx_zcal_calculate); @@ -978,7 +978,7 @@ void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev) uint64_t status_xscom; int lane, bit = 7; - switch (dev->index) { + switch (dev->brick_index) { case 2: status_xscom = OB0_ODL0_TRAINING_STATUS; break; diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index 4d30aa6..c916b2e 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -53,11 +53,11 @@ #include #define OCAPIDBG(dev, fmt, a...) prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \ - dev->npu->chip_id, dev->index, ## a) + dev->npu->chip_id, dev->brick_index, ## a) #define OCAPIINF(dev, fmt, a...) prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \ - dev->npu->chip_id, dev->index, ## a) + dev->npu->chip_id, dev->brick_index, ## a) #define OCAPIERR(dev, fmt, a...) prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \ - dev->npu->chip_id, dev->index, ## a) + dev->npu->chip_id, dev->brick_index, ## a) #define NPU_IRQ_LEVELS 35 @@ -753,16 +753,16 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base, static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base, struct npu2_dev *dev) { - uint64_t stack = index_to_stack(dev->index); - uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ? + uint64_t stack = index_to_stack(dev->brick_index); + uint64_t offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ? NPU2_NTL0_BAR : NPU2_NTL1_BAR; - uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ? + uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ? NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG : NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG; uint64_t addr, size, reg; prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__); - phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size); + phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size); prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size); write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr, @@ -783,7 +783,7 @@ static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base, static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base, struct npu2_dev *dev) { - uint64_t stack = index_to_stack(dev->index); + uint64_t stack = index_to_stack(dev->brick_index); int stack_num = stack - NPU2_STACK_STCK_0; uint64_t addr, size; @@ -799,8 +799,8 @@ static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base, static void otl_enabletx(uint32_t gcid, uint32_t scom_base, struct npu2_dev *dev) { - uint64_t stack = index_to_stack(dev->index); - uint64_t block = index_to_block(dev->index); + uint64_t stack = index_to_stack(dev->brick_index); + uint64_t block = index_to_block(dev->brick_index); uint64_t reg; /* OTL Config 2 Register */ @@ -822,7 +822,7 @@ static void assert_reset(struct npu2_dev *dev) uint8_t pin, data; int rc; - switch (dev->index) { + switch (dev->brick_index) { case 2: case 4: pin = platform.ocapi->i2c_reset_odl0; @@ -910,7 +910,7 @@ static bool i2c_presence_detect(struct npu2_dev *dev) OCAPIDBG(dev, "I2C presence detect: 0x%x\n", state); - switch (dev->index) { + switch (dev->brick_index) { // TODO(ajd): Link or brick index? case 2: data = platform.ocapi->i2c_presence_odl0; break; @@ -929,7 +929,7 @@ static void reset_odl(uint32_t gcid, struct npu2_dev *dev) { uint64_t reg, config_xscom; - switch (dev->index) { + switch (dev->brick_index) { case 2: config_xscom = OB0_ODL0_CONFIG; break; @@ -965,7 +965,7 @@ static void set_init_pattern(uint32_t gcid, struct npu2_dev *dev) { uint64_t reg, config_xscom; - switch (dev->index) { + switch (dev->brick_index) { case 2: config_xscom = OB0_ODL0_CONFIG; break; @@ -992,7 +992,7 @@ static void start_training(uint32_t gcid, struct npu2_dev *dev) { uint64_t reg, config_xscom; - switch (dev->index) { + switch (dev->brick_index) { case 2: config_xscom = OB0_ODL0_CONFIG; break; @@ -1035,7 +1035,7 @@ static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val) uint64_t reg; int64_t link_width, training_status, rc = OPAL_SUCCESS; - reg = get_odl_status(dev->npu->chip_id, dev->index); + reg = get_odl_status(dev->npu->chip_id, dev->brick_index); link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg); training_status = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg); @@ -1071,7 +1071,7 @@ static int64_t npu2_opencapi_retry_state(struct pci_slot *slot) */ OCAPIERR(dev, "Link failed to train, final link status: %016llx\n", - get_odl_status(chip_id, dev->index)); + get_odl_status(chip_id, dev->brick_index)); return OPAL_HARDWARE; } @@ -1093,7 +1093,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot) pci_slot_set_state(slot, OCAPI_SLOT_LINK_WAIT); /* fall-through */ case OCAPI_SLOT_LINK_WAIT: - reg = get_odl_status(chip_id, dev->index); + reg = get_odl_status(chip_id, dev->brick_index); if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == OCAPI_LINK_STATE_TRAINED) { OCAPIINF(dev, "link trained in %lld ms\n", @@ -1153,9 +1153,9 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot) if (dev->train_need_fence) { OCAPIDBG(dev, "Fencing OTL during reset\n"); set_fence_control(chip_id, dev->npu->xscom_base, - dev->index, 0b11); + dev->brick_index, 0b11); npu2_write(dev->npu, NPU2_MISC_FENCE_STATE, - PPC_BIT(dev->index + 6)); + PPC_BIT(dev->brick_index + 6)); dev->train_fenced = true; } dev->train_need_fence = true; @@ -1180,9 +1180,10 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot) case OCAPI_SLOT_FRESET_DEASSERT_DELAY: if (dev->train_fenced) { OCAPIDBG(dev, "Unfencing OTL after reset\n"); - npu2_write(dev->npu, NPU2_MISC_FENCE_STATE, PPC_BIT(dev->index)); + npu2_write(dev->npu, NPU2_MISC_FENCE_STATE, + PPC_BIT(dev->brick_index)); set_fence_control(chip_id, dev->npu->xscom_base, - dev->index, 0b00); + dev->brick_index, 0b00); dev->train_fenced = false; } @@ -1263,7 +1264,7 @@ static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn, return rc; genid_base = dev->bars[1].npu2_bar.base + - (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0); + (index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0); cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE; cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER | @@ -1321,7 +1322,7 @@ static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn, return rc; genid_base = dev->bars[1].npu2_bar.base + - (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0); + (index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0); cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE; cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER | @@ -1409,13 +1410,14 @@ static int64_t npu2_opencapi_set_pe(struct phb *phb, p = dev->npu; pe_bdfn = dev->bdfn; - + val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE; val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num); val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn); reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, - NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18)); - p->bdf2pe_cache[dev->index] = val; + NPU2_MISC_BRICK0_BDF2PE_MAP0 + + (dev->brick_index * 0x18)); + p->bdf2pe_cache[dev->brick_index] = val; npu2_write(p, reg, val); return OPAL_SUCCESS; @@ -1426,8 +1428,8 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd, { uint32_t irq; struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); - uint64_t block = index_to_block(dev->index); - uint64_t stacku = index_to_stacku(dev->index); + uint64_t block = index_to_block(dev->brick_index); + uint64_t stacku = index_to_stacku(dev->brick_index); uint64_t dsisr, dar, tfc, handle; /* @@ -1631,7 +1633,8 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, dev->phb_ocapi.ops = &npu2_opencapi_ops; dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi; dev->phb_ocapi.scan_map = 0; - dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); + dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); + dev->brick_index = dev->link_index; dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy"); dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask"); dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed"); @@ -1664,7 +1667,7 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, /* Procedure 13.1.3.9 - AFU Config BARs */ setup_afu_config_bars(n->chip_id, n->xscom_base, dev); - set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00); + set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00); if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) { setup_debug_training_state(dev); @@ -1847,8 +1850,8 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn, if (!dev) return OPAL_PARAMETER; - block = index_to_block(dev->index); - stack = index_to_stack(dev->index); + block = index_to_block(dev->brick_index); + stack = index_to_stack(dev->brick_index); if (block == NPU2_BLOCK_OTL1) offset = NPU2_XSL_PSL_SPAP_A1; else @@ -1912,8 +1915,8 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn, if (!dev) return OPAL_PARAMETER; - block = index_to_block(dev->index); - stack = index_to_stack(dev->index); + block = index_to_block(dev->brick_index); + stack = index_to_stack(dev->brick_index); cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0); lock(&dev->npu->lock); @@ -1987,8 +1990,8 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn, if (!dev) return OPAL_PARAMETER; - block = index_to_block(dev->index); - stack = index_to_stack(dev->index); + block = index_to_block(dev->brick_index); + stack = index_to_stack(dev->brick_index); /* * The 'capabilities' argument defines what TL template the * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so diff --git a/hw/npu2.c b/hw/npu2.c index 8cee628..3672638 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -1075,10 +1075,10 @@ static int64_t npu2_set_pe(struct phb *phb, val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn); if (!NPU2DEV_BRICK(dev)) - reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2, NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0); else - reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2, NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0); npu2_write(p, reg, val); @@ -1086,8 +1086,8 @@ static int64_t npu2_set_pe(struct phb *phb, val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num); val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn); reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, - NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18)); - p->bdf2pe_cache[dev->index] = val; + NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->brick_index * 0x18)); + p->bdf2pe_cache[dev->brick_index] = val; npu2_write(p, reg, val); return OPAL_SUCCESS; @@ -1601,7 +1601,7 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev, NULL); /* Link index */ - PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->index); + PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->link_index); return start + VENDOR_CAP_LEN; } @@ -1725,7 +1725,8 @@ static void npu2_populate_devices(struct npu2 *p, dev->type = NPU2_DEV_TYPE_NVLINK; dev->npu = p; dev->dt_node = link; - dev->index = dt_prop_get_u32(link, "ibm,npu-link-index"); + dev->link_index = dt_prop_get_u32(link, "ibm,npu-link-index"); + dev->brick_index = dev->link_index; group_id = dt_prop_get_u32(link, "ibm,npu-group-id"); dev->bdfn = npu_allocate_bdfn(p, group_id); @@ -1742,7 +1743,7 @@ static void npu2_populate_devices(struct npu2 *p, stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev); npu2_bar = &dev->bars[0].npu2_bar; npu2_bar->type = NPU_NTL; - npu2_bar->index = dev->index; + npu2_bar->index = dev->brick_index; npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ? NPU2_NTL0_BAR : NPU2_NTL1_BAR); npu2_get_bar(p->chip_id, npu2_bar); @@ -2248,8 +2249,10 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid, goto out; } - xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar, 0x4 >> (ndev->index / 2)); - xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar, (ndev->index % 2)); + xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar, + 0x4 >> (ndev->brick_index / 2)); + xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar, + (ndev->brick_index % 2)); NPU2DBG(p, "XTS_BDF_MAP[%03d] = 0x%08llx\n", id, xts_bdf_lpar); npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf_lpar); diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 6bd77e4..8c1ba5f 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -44,19 +44,19 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, (((stack) << 20) | ((block) << 16) | (offset)) #define NPU2_NTL_REG_OFFSET(ndev, offset) \ - NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->index >> 1), \ - NPU2_BLOCK_NTL0 + ((ndev)->index % 2)*2, offset) + NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->brick_index >> 1), \ + NPU2_BLOCK_NTL0 + ((ndev)->brick_index % 2)*2, offset) #define NPU2_NTLU_REG_OFFSET(ndev, offset) \ - NPU2_REG_OFFSET(NPU2_STACK_STCK_0U + ((ndev)->index >> 1), \ - NPU2_BLOCK_NTL0 + ((ndev)->index % 2)*2, offset) + NPU2_REG_OFFSET(NPU2_STACK_STCK_0U + ((ndev)->brick_index >> 1), \ + NPU2_BLOCK_NTL0 + ((ndev)->brick_index % 2)*2, offset) #define NPU2_DL_REG_OFFSET(ndev, offset) \ - NPU2_REG_OFFSET(((ndev)->index >> 1), \ - 8 + ((ndev)->index % 2)*2, offset) + NPU2_REG_OFFSET(((ndev)->brick_index >> 1), \ + 8 + ((ndev)->brick_index % 2)*2, offset) #define NPU2_SM_REG_OFFSET(ndev, sm, offset) \ - NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->index >> 1), \ + NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->brick_index >> 1), \ NPU2_BLOCK_SM_0 + (sm), offset) /* Get the offset for this register */ diff --git a/include/npu2.h b/include/npu2.h index 4c2e20e..1074203 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -47,10 +47,10 @@ #define NPU2_DEV_DL_RESET 0x2 /* Return the stack (0-2) of a device */ -#define NPU2DEV_STACK(ndev) ((ndev)->index / 2) +#define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2) /* Return the brick number (0-1) within a stack */ -#define NPU2DEV_BRICK(ndev) ((ndev)->index % 2) +#define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2) /* This represents the state of the actual hardware BARs not the * emulated PCIe BARs. The is a subtle difference between the two as @@ -111,7 +111,8 @@ struct npu2_dev_nvlink { struct npu2_dev { enum npu2_dev_type type; - uint32_t index; + uint32_t link_index; + uint32_t brick_index; uint64_t pl_xscom_base; struct dt_node *dt_node; struct npu2_pcie_bar bars[2]; -- cgit v1.1