aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/npu2.c196
-rw-r--r--include/npu2.h2
2 files changed, 198 insertions, 0 deletions
diff --git a/hw/npu2.c b/hw/npu2.c
index ada6b38..0215570 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -455,18 +455,209 @@ static void npu2_append_phandle(struct dt_node *dn,
unlock(&pci_npu_phandle_lock);
}
+static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size)
+{
+ struct dt_node *mem;
+ char *name;
+ size_t namesz;
+ static u32 chip_id = 255;
+
+ /*
+ * Find and return the node if it already exists.
+ */
+ namesz = sizeof("memory@") + STR_MAX_CHARS(addr);
+ name = malloc(namesz);
+ if (!name)
+ return NULL;
+ snprintf(name, namesz, "memory@%llx", (long long)addr);
+ mem = dt_find_by_name(dt_root, name);
+ free(name);
+ if (mem)
+ return mem;
+
+ mem = dt_new_addr(dt_root, "memory", addr);
+ if (!mem)
+ return NULL;
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property_string(mem, "compatible", "ibm,coherent-device-memory");
+ dt_add_property_u64s(mem, "reg", addr, size);
+ dt_add_property_cells(mem, "ibm,chip-id", chip_id);
+ dt_add_property_u64s(mem, "linux,usable-memory", addr, 0);
+ dt_add_property_cells(mem, "ibm,associativity", 4, 0, 0, 0, chip_id--);
+
+ assert(chip_id);
+ return mem;
+}
+
+static void npu2_dn_fixup_gmb(struct dt_node *pd_dn, uint64_t gmb)
+{
+ uint64_t sel, gpu_base, gpu_size, gta;
+ struct dt_node *mem_dn;
+
+ sel = GETFIELD(NPU2_MEM_BAR_SEL_MEM, gmb);
+ switch (sel) {
+ case 0:
+ /* BAR disabled */
+ return;
+ case 4:
+ gpu_base = 0;
+ break;
+ case 5:
+ gpu_base = 1UL << 49;
+ break;
+ case 6:
+ gpu_base = 2UL << 49;
+ break;
+ default:
+ prlog(PR_ERR, "unhandled NPU2_MEM_BAR_SEL_MEM 0x%llx\n", sel);
+ return;
+ }
+
+ gpu_base |= GETFIELD(NPU2_MEM_BAR_GROUP, gmb) << 43;
+ gpu_base |= GETFIELD(NPU2_MEM_BAR_CHIP, gmb) << 41;
+ gpu_base |= GETFIELD(NPU2_MEM_BAR_ADDR, gmb) << 30;
+ gpu_size = GETFIELD(NPU2_MEM_BAR_BAR_SIZE, gmb) << 32;
+
+ mem_dn = npu2_create_memory_dn(gpu_base, gpu_size);
+ assert(mem_dn);
+ dt_add_property_cells(pd_dn, "memory-region", mem_dn->phandle);
+
+ gta = ((gpu_base >> 42) & 0x1) << 41;
+ gta |= ((gpu_base >> 45) & 0x3) << 42;
+ gta |= ((gpu_base >> 49) & 0x3) << 45;
+ gta |= gpu_base & ((1UL << 43) - 1);
+
+ dt_add_property_u64s(pd_dn, "ibm,device-tgt-addr", gta);
+}
+
+/* Used by the below function to assign GPU base addresses */
+static uint64_t npu2_group_addr[NPU2_LINKS_PER_CHIP] = {0};
+static uint64_t npu2_base_addr;
+static int npu2_assign_gmb(struct phb *phb, struct pci_device *pd,
+ void *data __unused)
+{
+ struct npu2 *p = phb_to_npu2(phb);
+ struct npu2_dev *ndev;
+ int group, peers, mode;
+ uint32_t bdfn;
+ uint64_t reg, gmb, old_val;
+
+ ndev = npu2_bdf_to_dev(p, pd->bdfn);
+ assert(ndev);
+
+ /* Assign GPU memory base addresses. The current strategy is
+ * to work backwards from maximum memory assigning 128GB per
+ * GPU as that is the minimum alignment requirements. So we
+ * need to count the number of GPUs and give each a base
+ * address then configure the hashing based on the number of
+ * links */
+
+ /* Need to work out number of link peers. This amount to
+ * working out the maximum function number. So work start at
+ * the highest bdfn (fn = 6) and count back until we find a
+ * npu2_dev. */
+ for (bdfn = (ndev->bdfn & ~0x7) | NPU2_LINKS_PER_CHIP;
+ (bdfn & 0x7) != 0x7; bdfn = (bdfn & ~0x7) | ((bdfn & 0x7) - 1))
+ if (npu2_bdf_to_dev(p, bdfn))
+ break;
+
+ peers = bdfn & 0x7;
+ group = (bdfn >> 3) & 0x1f;
+
+ /* These should never happen but would lead to memory
+ * corruption if they do so best to check. */
+ assert(peers != 0x7);
+ assert(group < NPU2_LINKS_PER_CHIP);
+
+ if (!npu2_group_addr[group]) {
+ /* Assign new base address */
+ npu2_base_addr -= 128;
+ npu2_group_addr[group] = npu2_base_addr;
+ }
+
+ gmb = SETFIELD(NPU2_MEM_BAR_SEL_MEM, 0ULL, 4);
+ gmb = SETFIELD(PPC_BITMASK(2,21), gmb, npu2_group_addr[group]);
+ gmb = SETFIELD(NPU2_MEM_BAR_POISON, gmb, 1);
+ gmb = SETFIELD(NPU2_MEM_BAR_GRANULE, gmb, 0);
+
+ /* We don't know how much memory the GPU has but if we
+ * have to align it to 128GB boundaries we may as well
+ * just pass the whole aperture through at this
+ * point. */
+ gmb = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, gmb, 7);
+
+ switch (peers) {
+ case 0:
+ mode = 0;
+ break;
+ case 1:
+ mode = 1;
+ break;
+ case 2:
+ mode = 3;
+ break;
+ case 3:
+ mode = 6;
+ break;
+ case 5:
+ mode = 10;
+ break;
+ default:
+ /* Hardware does not support this configuration */
+ assert(0);
+ }
+
+ mode += ndev->bdfn & 0x7;
+ gmb = SETFIELD(NPU2_MEM_BAR_MODE, gmb, mode);
+ if (NPU2DEV_BRICK(ndev))
+ gmb >>= 32;
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
+ NPU2_BLOCK_SM_0,
+ NPU2_GPU0_MEM_BAR);
+
+ old_val = npu2_read(p, reg);
+ gmb |= old_val;
+
+ npu2_write(p, reg, gmb);
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
+ NPU2_BLOCK_SM_1,
+ NPU2_GPU0_MEM_BAR);
+ npu2_write(p, reg, gmb);
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
+ NPU2_BLOCK_SM_2,
+ NPU2_GPU0_MEM_BAR);
+ npu2_write(p, reg, gmb);
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
+ NPU2_BLOCK_SM_3,
+ NPU2_GPU0_MEM_BAR);
+ npu2_write(p, reg, gmb);
+
+ return 0;
+}
+
static int npu2_dn_fixup(struct phb *phb,
struct pci_device *pd,
void *data __unused)
{
struct npu2 *p = phb_to_npu2(phb);
struct npu2_dev *dev;
+ uint64_t reg, gmb;
dev = npu2_bdf_to_dev(p, pd->bdfn);
assert(dev);
if (dev->phb || dev->pd)
return 0;
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev),
+ NPU2_BLOCK_SM_0,
+ NPU2_GPU0_MEM_BAR);
+ gmb = npu2_read(p, reg);
+ if (NPU2DEV_BRICK(dev))
+ gmb <<= 32;
+ else
+ gmb &= 0xffffffff00000000;
+
+ npu2_dn_fixup_gmb(pd->dn, gmb);
dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dt_node->phandle);
/* NPU devices require a slot location to associate with GPUs */
@@ -503,6 +694,11 @@ static int npu2_dn_fixup(struct phb *phb,
static void npu2_phb_final_fixup(struct phb *phb)
{
+ struct npu2 *p = phb_to_npu2(phb);
+
+ /* Start allocating GPU memory from 4TB down. */
+ npu2_base_addr = (p->chip_id << 21) | 4*1024;
+ pci_walk_dev(phb, NULL, npu2_assign_gmb, NULL);
pci_walk_dev(phb, NULL, npu2_dn_fixup, NULL);
}
diff --git a/include/npu2.h b/include/npu2.h
index ec62ad2..d535e65 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -29,6 +29,8 @@
#define NPU2_MAX_PE_NUM 16
#define NPU2_RESERVED_PE_NUM 15
+#define NPU2_LINKS_PER_CHIP 6
+
/* Return the stack (0-2) of a device */
#define NPU2DEV_STACK(ndev) ((ndev)->index / 2)