hw/npu2.c: Add memory coherence directory programming

The memory coherence directory (MCD) needs to know which system memory addresses belong to the GPU. This amounts to setting a BAR and a size in the MCD to cover the addresses assigned to each of the GPUs. To ease assignment we assume GPUs are assigned memory in a contiguous block per chip. Signed-off-by: Alistair Popple <alistair@popple.id.au> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
author: Alistair Popple <alistair@popple.id.au> 2017-06-01 13:46:04 +1000
committer: Stewart Smith <stewart@linux.vnet.ibm.com> 2017-06-06 17:59:45 +1000
commit: b5d85370f669ab4e64f63cdd73c35f16e9d68794 (patch)
tree: 3b0aa7fb1d814859044f7011c0387e6c4e5c8b00 /hw
parent: bdea201a4c4b691522e04ecb57aff5aefc66cfbb (diff)
download: skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.zip
skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.gz
skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.bz2
1 files changed, 38 insertions, 1 deletions
diff --git a/hw/npu2.c b/hw/npu2.c
index 4e08db4..dc6eb54 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -728,13 +728,50 @@ static int64_t npu2_ioda_reset(struct phb *phb, bool purge)
 
 static void npu2_hw_init(struct npu2 *p)
 {
-	uint64_t val;
+	int i;
+	uint64_t val, size, addr, gpu_min_addr, gpu_max_addr, total_size;
+	struct proc_chip *chip = get_chip(p->chip_id);
 
 	npu2_ioda_reset(&p->phb, false);
 
 	/* Enable XTS retry mode */
 	val = npu2_read(p, NPU2_XTS_CFG);
 	npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_MMIOSD | NPU2_XTS_CFG_TRY_ATR_RO);
+
+	/* Init memory cache directory (MCD) registers. */
+	phys_map_get(chip, GPU_MEM, NPU2_LINKS_PER_CHIP - 1, &gpu_min_addr, NULL);
+	phys_map_get(chip, GPU_MEM, 0, &gpu_max_addr, &size);
+	gpu_max_addr += size;
+
+	/* We assume GPU memory is contiguous from the first possible GPU to the
+	 * last and that the size is the same so best to check that. */
+	for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
+		uint64_t tmp;
+		phys_map_get(chip, GPU_MEM, i, &addr, &tmp);
+		assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
+		assert(tmp == size);
+	}
+
+	/* We have two MCDs, so if neccessary we can split the region covered
+	 * across both if total_size is not a power of two. */
+	total_size = gpu_max_addr - gpu_min_addr;
+	size = 1ull << ilog2(total_size);
+	val = PPC_BIT(0);
+	val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1);
+	val = SETFIELD(PPC_BITMASK(33, 63), val, gpu_min_addr >> 25);
+	xscom_write(p->chip_id, MCD0_BANK0_CN3, val);
+	total_size -= size;
+	if (total_size) {
+	/* total_size was not a power of two, but the remainder should
+	 * be if all GPUs were assigned the same size. */
+		assert(is_pow2(total_size));
+		addr += size;
+		size = 1ull << ilog2(total_size);
+		val = PPC_BIT(0);
+		val = SETFIELD(PPC_BITMASK(13, 29), val, (size >> 25) - 1);
+		val = SETFIELD(PPC_BITMASK(33, 63), val, addr >> 25);
+		xscom_write(p->chip_id, MCD1_BANK0_CN3, val);
+	}
 }
 
 static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
author	Alistair Popple <alistair@popple.id.au>	2017-06-01 13:46:04 +1000
committer	Stewart Smith <stewart@linux.vnet.ibm.com>	2017-06-06 17:59:45 +1000
commit	b5d85370f669ab4e64f63cdd73c35f16e9d68794 (patch)
tree	3b0aa7fb1d814859044f7011c0387e6c4e5c8b00 /hw
parent	bdea201a4c4b691522e04ecb57aff5aefc66cfbb (diff)
download	skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.zip skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.gz skiboot-b5d85370f669ab4e64f63cdd73c35f16e9d68794.tar.bz2