aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorAlistair Popple <alistair@popple.id.au>2017-06-01 13:46:04 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2017-06-06 17:59:45 +1000
commitb5d85370f669ab4e64f63cdd73c35f16e9d68794 (patch)
tree3b0aa7fb1d814859044f7011c0387e6c4e5c8b00 /hw
parentbdea201a4c4b691522e04ecb57aff5aefc66cfbb (diff)
downloadskiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.zip
skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.gz
skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.bz2
hw/npu2.c: Add memory coherence directory programming
The memory coherence directory (MCD) needs to know which system memory addresses belong to the GPU. This amounts to setting a BAR and a size in the MCD to cover the addresses assigned to each of the GPUs. To ease assignment we assume GPUs are assigned memory in a contiguous block per chip. Signed-off-by: Alistair Popple <alistair@popple.id.au> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/npu2.c39
1 files changed, 38 insertions, 1 deletions
diff --git a/hw/npu2.c b/hw/npu2.c
index 4e08db4..dc6eb54 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -728,13 +728,50 @@ static int64_t npu2_ioda_reset(struct phb *phb, bool purge)
static void npu2_hw_init(struct npu2 *p)
{
- uint64_t val;
+ int i;
+ uint64_t val, size, addr, gpu_min_addr, gpu_max_addr, total_size;
+ struct proc_chip *chip = get_chip(p->chip_id);
npu2_ioda_reset(&p->phb, false);
/* Enable XTS retry mode */
val = npu2_read(p, NPU2_XTS_CFG);
npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_MMIOSD | NPU2_XTS_CFG_TRY_ATR_RO);
+
+ /* Init memory cache directory (MCD) registers. */
+ phys_map_get(chip, GPU_MEM, NPU2_LINKS_PER_CHIP - 1, &gpu_min_addr, NULL);
+ phys_map_get(chip, GPU_MEM, 0, &gpu_max_addr, &size);
+ gpu_max_addr += size;
+
+ /* We assume GPU memory is contiguous from the first possible GPU to the
+ * last and that the size is the same so best to check that. */
+ for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
+ uint64_t tmp;
+ phys_map_get(chip, GPU_MEM, i, &addr, &tmp);
+ assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
+ assert(tmp == size);
+ }
+
+ /* We have two MCDs, so if neccessary we can split the region covered
+ * across both if total_size is not a power of two. */
+ total_size = gpu_max_addr - gpu_min_addr;
+ size = 1ull << ilog2(total_size);
+ val = PPC_BIT(0);
+ val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1);
+ val = SETFIELD(PPC_BITMASK(33, 63), val, gpu_min_addr >> 25);
+ xscom_write(p->chip_id, MCD0_BANK0_CN3, val);
+ total_size -= size;
+ if (total_size) {
+ /* total_size was not a power of two, but the remainder should
+ * be if all GPUs were assigned the same size. */
+ assert(is_pow2(total_size));
+ addr += size;
+ size = 1ull << ilog2(total_size);
+ val = PPC_BIT(0);
+ val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1);
+ val = SETFIELD(PPC_BITMASK(33, 63), val, addr >> 25);
+ xscom_write(p->chip_id, MCD1_BANK0_CN3, val);
+ }
}
static int64_t npu2_map_pe_dma_window_real(struct phb *phb,