diff options
author | Alistair Popple <alistair@popple.id.au> | 2017-06-01 13:46:04 +1000 |
---|---|---|
committer | Stewart Smith <stewart@linux.vnet.ibm.com> | 2017-06-06 17:59:45 +1000 |
commit | b5d85370f669ab4e64f63cdd73c35f16e9d68794 (patch) | |
tree | 3b0aa7fb1d814859044f7011c0387e6c4e5c8b00 /hw | |
parent | bdea201a4c4b691522e04ecb57aff5aefc66cfbb (diff) | |
download | skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.zip skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.gz skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.bz2 |
hw/npu2.c: Add memory coherence directory programming
The memory coherence directory (MCD) needs to know which system memory addresses
belong to the GPU. This amounts to setting a BAR and a size in the MCD to cover
the addresses assigned to each of the GPUs. To ease assignment we assume GPUs
are assigned memory in a contiguous block per chip.
Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/npu2.c | 39 |
1 files changed, 38 insertions, 1 deletions
@@ -728,13 +728,50 @@ static int64_t npu2_ioda_reset(struct phb *phb, bool purge) static void npu2_hw_init(struct npu2 *p) { - uint64_t val; + int i; + uint64_t val, size, addr, gpu_min_addr, gpu_max_addr, total_size; + struct proc_chip *chip = get_chip(p->chip_id); npu2_ioda_reset(&p->phb, false); /* Enable XTS retry mode */ val = npu2_read(p, NPU2_XTS_CFG); npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_MMIOSD | NPU2_XTS_CFG_TRY_ATR_RO); + + /* Init memory cache directory (MCD) registers. */ + phys_map_get(chip, GPU_MEM, NPU2_LINKS_PER_CHIP - 1, &gpu_min_addr, NULL); + phys_map_get(chip, GPU_MEM, 0, &gpu_max_addr, &size); + gpu_max_addr += size; + + /* We assume GPU memory is contiguous from the first possible GPU to the + * last and that the size is the same so best to check that. */ + for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) { + uint64_t tmp; + phys_map_get(chip, GPU_MEM, i, &addr, &tmp); + assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr)); + assert(tmp == size); + } + + /* We have two MCDs, so if neccessary we can split the region covered + * across both if total_size is not a power of two. */ + total_size = gpu_max_addr - gpu_min_addr; + size = 1ull << ilog2(total_size); + val = PPC_BIT(0); + val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1); + val = SETFIELD(PPC_BITMASK(33, 63), val, gpu_min_addr >> 25); + xscom_write(p->chip_id, MCD0_BANK0_CN3, val); + total_size -= size; + if (total_size) { + /* total_size was not a power of two, but the remainder should + * be if all GPUs were assigned the same size. */ + assert(is_pow2(total_size)); + addr += size; + size = 1ull << ilog2(total_size); + val = PPC_BIT(0); + val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1); + val = SETFIELD(PPC_BITMASK(33, 63), val, addr >> 25); + xscom_write(p->chip_id, MCD1_BANK0_CN3, val); + } } static int64_t npu2_map_pe_dma_window_real(struct phb *phb, |