diff options
-rw-r--r-- | hw/npu2.c | 196 | ||||
-rw-r--r-- | include/npu2.h | 2 |
2 files changed, 198 insertions, 0 deletions
@@ -455,18 +455,209 @@ static void npu2_append_phandle(struct dt_node *dn, unlock(&pci_npu_phandle_lock); } +static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size) +{ + struct dt_node *mem; + char *name; + size_t namesz; + static u32 chip_id = 255; + + /* + * Find and return the node if it already exists. + */ + namesz = sizeof("memory@") + STR_MAX_CHARS(addr); + name = malloc(namesz); + if (!name) + return NULL; + snprintf(name, namesz, "memory@%llx", (long long)addr); + mem = dt_find_by_name(dt_root, name); + free(name); + if (mem) + return mem; + + mem = dt_new_addr(dt_root, "memory", addr); + if (!mem) + return NULL; + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property_string(mem, "compatible", "ibm,coherent-device-memory"); + dt_add_property_u64s(mem, "reg", addr, size); + dt_add_property_cells(mem, "ibm,chip-id", chip_id); + dt_add_property_u64s(mem, "linux,usable-memory", addr, 0); + dt_add_property_cells(mem, "ibm,associativity", 4, 0, 0, 0, chip_id--); + + assert(chip_id); + return mem; +} + +static void npu2_dn_fixup_gmb(struct dt_node *pd_dn, uint64_t gmb) +{ + uint64_t sel, gpu_base, gpu_size, gta; + struct dt_node *mem_dn; + + sel = GETFIELD(NPU2_MEM_BAR_SEL_MEM, gmb); + switch (sel) { + case 0: + /* BAR disabled */ + return; + case 4: + gpu_base = 0; + break; + case 5: + gpu_base = 1UL << 49; + break; + case 6: + gpu_base = 2UL << 49; + break; + default: + prlog(PR_ERR, "unhandled NPU2_MEM_BAR_SEL_MEM 0x%llx\n", sel); + return; + } + + gpu_base |= GETFIELD(NPU2_MEM_BAR_GROUP, gmb) << 43; + gpu_base |= GETFIELD(NPU2_MEM_BAR_CHIP, gmb) << 41; + gpu_base |= GETFIELD(NPU2_MEM_BAR_ADDR, gmb) << 30; + gpu_size = GETFIELD(NPU2_MEM_BAR_BAR_SIZE, gmb) << 32; + + mem_dn = npu2_create_memory_dn(gpu_base, gpu_size); + assert(mem_dn); + dt_add_property_cells(pd_dn, "memory-region", mem_dn->phandle); + + gta = ((gpu_base >> 42) & 0x1) << 41; + gta |= ((gpu_base >> 45) & 0x3) << 42; + gta |= ((gpu_base >> 49) & 0x3) << 45; + gta |= gpu_base & ((1UL << 43) - 1); + + dt_add_property_u64s(pd_dn, "ibm,device-tgt-addr", gta); +} + +/* Used by the below function to assign GPU base addresses */ +static uint64_t npu2_group_addr[NPU2_LINKS_PER_CHIP] = {0}; +static uint64_t npu2_base_addr; +static int npu2_assign_gmb(struct phb *phb, struct pci_device *pd, + void *data __unused) +{ + struct npu2 *p = phb_to_npu2(phb); + struct npu2_dev *ndev; + int group, peers, mode; + uint32_t bdfn; + uint64_t reg, gmb, old_val; + + ndev = npu2_bdf_to_dev(p, pd->bdfn); + assert(ndev); + + /* Assign GPU memory base addresses. The current strategy is + * to work backwards from maximum memory assigning 128GB per + * GPU as that is the minimum alignment requirements. So we + * need to count the number of GPUs and give each a base + * address then configure the hashing based on the number of + * links */ + + /* Need to work out number of link peers. This amount to + * working out the maximum function number. So work start at + * the highest bdfn (fn = 6) and count back until we find a + * npu2_dev. */ + for (bdfn = (ndev->bdfn & ~0x7) | NPU2_LINKS_PER_CHIP; + (bdfn & 0x7) != 0x7; bdfn = (bdfn & ~0x7) | ((bdfn & 0x7) - 1)) + if (npu2_bdf_to_dev(p, bdfn)) + break; + + peers = bdfn & 0x7; + group = (bdfn >> 3) & 0x1f; + + /* These should never happen but would lead to memory + * corruption if they do so best to check. */ + assert(peers != 0x7); + assert(group < NPU2_LINKS_PER_CHIP); + + if (!npu2_group_addr[group]) { + /* Assign new base address */ + npu2_base_addr -= 128; + npu2_group_addr[group] = npu2_base_addr; + } + + gmb = SETFIELD(NPU2_MEM_BAR_SEL_MEM, 0ULL, 4); + gmb = SETFIELD(PPC_BITMASK(2,21), gmb, npu2_group_addr[group]); + gmb = SETFIELD(NPU2_MEM_BAR_POISON, gmb, 1); + gmb = SETFIELD(NPU2_MEM_BAR_GRANULE, gmb, 0); + + /* We don't know how much memory the GPU has but if we + * have to align it to 128GB boundaries we may as well + * just pass the whole aperture through at this + * point. */ + gmb = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, gmb, 7); + + switch (peers) { + case 0: + mode = 0; + break; + case 1: + mode = 1; + break; + case 2: + mode = 3; + break; + case 3: + mode = 6; + break; + case 5: + mode = 10; + break; + default: + /* Hardware does not support this configuration */ + assert(0); + } + + mode += ndev->bdfn & 0x7; + gmb = SETFIELD(NPU2_MEM_BAR_MODE, gmb, mode); + if (NPU2DEV_BRICK(ndev)) + gmb >>= 32; + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev), + NPU2_BLOCK_SM_0, + NPU2_GPU0_MEM_BAR); + + old_val = npu2_read(p, reg); + gmb |= old_val; + + npu2_write(p, reg, gmb); + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev), + NPU2_BLOCK_SM_1, + NPU2_GPU0_MEM_BAR); + npu2_write(p, reg, gmb); + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev), + NPU2_BLOCK_SM_2, + NPU2_GPU0_MEM_BAR); + npu2_write(p, reg, gmb); + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev), + NPU2_BLOCK_SM_3, + NPU2_GPU0_MEM_BAR); + npu2_write(p, reg, gmb); + + return 0; +} + static int npu2_dn_fixup(struct phb *phb, struct pci_device *pd, void *data __unused) { struct npu2 *p = phb_to_npu2(phb); struct npu2_dev *dev; + uint64_t reg, gmb; dev = npu2_bdf_to_dev(p, pd->bdfn); assert(dev); if (dev->phb || dev->pd) return 0; + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), + NPU2_BLOCK_SM_0, + NPU2_GPU0_MEM_BAR); + gmb = npu2_read(p, reg); + if (NPU2DEV_BRICK(dev)) + gmb <<= 32; + else + gmb &= 0xffffffff00000000; + + npu2_dn_fixup_gmb(pd->dn, gmb); dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dt_node->phandle); /* NPU devices require a slot location to associate with GPUs */ @@ -503,6 +694,11 @@ static int npu2_dn_fixup(struct phb *phb, static void npu2_phb_final_fixup(struct phb *phb) { + struct npu2 *p = phb_to_npu2(phb); + + /* Start allocating GPU memory from 4TB down. */ + npu2_base_addr = (p->chip_id << 21) | 4*1024; + pci_walk_dev(phb, NULL, npu2_assign_gmb, NULL); pci_walk_dev(phb, NULL, npu2_dn_fixup, NULL); } diff --git a/include/npu2.h b/include/npu2.h index ec62ad2..d535e65 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -29,6 +29,8 @@ #define NPU2_MAX_PE_NUM 16 #define NPU2_RESERVED_PE_NUM 15 +#define NPU2_LINKS_PER_CHIP 6 + /* Return the stack (0-2) of a device */ #define NPU2DEV_STACK(ndev) ((ndev)->index / 2) |