aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst (renamed from doc/opal-api/opal-npu2-opencapi-159-160-161.rst)77
-rw-r--r--hw/npu2-opencapi.c181
-rw-r--r--hw/phys-map.c11
-rw-r--r--include/npu2-regs.h7
-rw-r--r--include/npu2.h11
-rw-r--r--include/opal-api.h4
-rw-r--r--include/phys-map.h1
7 files changed, 288 insertions, 4 deletions
diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst
index 4db3d3e..6d603f1 100644
--- a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst
+++ b/doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst
@@ -124,3 +124,80 @@ OPAL_SUCCESS
OPAL_PARAMETER
A provided parameter was invalid
+
+ .. _OPAL_NPU_MEM_ALLOC:
+
+OPAL_NPU_MEM_ALLOC
+==================
+
+OpenCAPI devices only.
+
+Sets up the NPU memory BAR for Lowest Point of Coherency (LPC) memory.
+
+At present, only one device per CPU can use LPC memory, and a maximum of 4TB
+can be allocated.
+
+Parameters
+----------
+::
+
+ uint64_t phb_id
+ uint32_t bdfn
+ uint64_t size
+ uint64_t *bar
+
+``phb_id``
+ OPAL ID of PHB
+
+``bdfn``
+ Bus-Device-Function number of OpenCAPI AFU
+
+``size``
+ Size of requested LPC memory area in bytes
+
+``bar``
+ Pointer to variable where base of LPC memory area will be returned
+
+Return Values
+-------------
+
+OPAL_SUCCESS
+ BAR setup completed successfully
+
+OPAL_PARAMETER
+ A provided parameter was invalid
+
+OPAL_RESOURCE
+ The BAR could not be assigned due to limitations
+
+.. _OPAL_NPU_MEM_RELEASE:
+
+OPAL_NPU_MEM_RELEASE
+====================
+
+OpenCAPI devices only.
+
+Releases NPU memory BAR.
+
+Parameters
+----------
+::
+
+ uint64_t phb_id
+ uint32_t bdfn
+
+``phb_id``
+ OPAL ID of PHB
+
+``bdfn``
+ Bus-Device-Function number of OpenCAPI AFU
+
+Return Values
+-------------
+
+OPAL_SUCCESS
+ BAR setup completed successfully
+
+OPAL_PARAMETER
+ A provided parameter was invalid, or the specified device does not currently
+ have LPC memory assigned
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 27dfc12..7a90cfa 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -2025,3 +2025,184 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn,
return OPAL_SUCCESS;
}
opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
+
+static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size)
+{
+ uint64_t stack, val, reg, bar_offset, pa_config_offset;
+ uint8_t memsel;
+
+ stack = index_to_stack(dev->brick_index);
+ switch (dev->brick_index) {
+ case 2:
+ case 4:
+ bar_offset = NPU2_GPU0_MEM_BAR;
+ pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG;
+ break;
+ case 3:
+ case 5:
+ bar_offset = NPU2_GPU1_MEM_BAR;
+ pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG;
+ break;
+ default:
+ assert(false);
+ }
+
+ assert((!size && !base) || (size && base));
+
+ /*
+ * Memory select configuration:
+ * - 0b000 - BAR disabled
+ * - 0b001 - match 0b00, 0b01
+ * - 0b010 - match 0b01, 0b10
+ * - 0b011 - match 0b00, 0b10
+ * - 0b100 - match 0b00
+ * - 0b101 - match 0b01
+ * - 0b110 - match 0b10
+ * - 0b111 - match 0b00, 0b01, 0b10
+ */
+ memsel = GETFIELD(PPC_BITMASK(13, 14), base);
+ if (size)
+ val = SETFIELD(NPU2_MEM_BAR_EN | NPU2_MEM_BAR_SEL_MEM, 0ULL, 0b100 + memsel);
+ else
+ val = 0;
+
+ /* Base address - 12 bits, 1G aligned */
+ val = SETFIELD(NPU2_MEM_BAR_NODE_ADDR, val, GETFIELD(PPC_BITMASK(22, 33), base));
+
+ /* GCID */
+ val = SETFIELD(NPU2_MEM_BAR_GROUP, val, GETFIELD(PPC_BITMASK(15, 18), base));
+ val = SETFIELD(NPU2_MEM_BAR_CHIP, val, GETFIELD(PPC_BITMASK(19, 21), base));
+
+ /* Other settings */
+ val = SETFIELD(NPU2_MEM_BAR_POISON, val, 1);
+ val = SETFIELD(NPU2_MEM_BAR_GRANULE, val, 0);
+ val = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, val, ilog2(size >> 30));
+ val = SETFIELD(NPU2_MEM_BAR_MODE, val, 0);
+
+ for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
+ reg = NPU2_REG_OFFSET(stack, block, bar_offset);
+ npu2_write(dev->npu, reg, val);
+ }
+
+ /* Set PA config */
+ if (size)
+ val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH, 0ULL, 0b100 + memsel);
+ else
+ val = 0;
+ val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE, val, 0);
+ val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE, val, ilog2(size >> 30));
+ val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MODE, val, 0);
+ val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MASK, val, 0);
+ reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset);
+ npu2_write(dev->npu, reg, val);
+}
+
+static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
+{
+ uint64_t phys_map_base, phys_map_size;
+ int rc = OPAL_SUCCESS;
+
+ lock(&dev->npu->lock);
+
+ /*
+ * Right now, we support 1 allocation per chip, of up to 4TB.
+ *
+ * In future, we will use chip address extension to support
+ * >4TB ranges, and we will implement a more sophisticated
+ * allocator to allow an allocation for every link on a chip.
+ */
+
+ if (dev->npu->lpc_mem_allocated) {
+ rc = OPAL_RESOURCE;
+ goto out;
+ }
+
+ phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
+
+ if (size > phys_map_size) {
+ /**
+ * @fwts-label OCAPIInvalidLPCMemoryBARSize
+ * @fwts-advice The operating system requested an unsupported
+ * amount of OpenCAPI LPC memory. This is possibly a kernel
+ * bug, or you may need to upgrade your firmware.
+ */
+ OCAPIERR(dev, "Invalid LPC memory BAR allocation size requested: 0x%llx bytes (limit 0x%llx)\n",
+ size, phys_map_size);
+ rc = OPAL_PARAMETER;
+ goto out;
+ }
+
+ /* Minimum BAR size is 1 GB */
+ if (size < (1 << 30)) {
+ size = 1 << 30;
+ }
+
+ if (!is_pow2(size)) {
+ size = 1 << (ilog2(size) + 1);
+ }
+
+ set_mem_bar(dev, phys_map_base, size);
+ *bar = phys_map_base;
+ dev->npu->lpc_mem_allocated = dev;
+
+out:
+ unlock(&dev->npu->lock);
+ return rc;
+}
+
+static int64_t release_mem_bar(struct npu2_dev *dev)
+{
+ int rc = OPAL_SUCCESS;
+
+ lock(&dev->npu->lock);
+
+ if (dev->npu->lpc_mem_allocated != dev) {
+ rc = OPAL_PARAMETER;
+ goto out;
+ }
+
+ set_mem_bar(dev, 0, 0);
+ dev->npu->lpc_mem_allocated = NULL;
+
+out:
+ unlock(&dev->npu->lock);
+ return rc;
+}
+
+static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
+ uint64_t size, uint64_t *bar)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ struct npu2_dev *dev;
+
+
+ if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
+ return OPAL_PARAMETER;
+
+ dev = phb_to_npu2_dev_ocapi(phb);
+ if (!dev)
+ return OPAL_PARAMETER;
+
+ if (!opal_addr_valid(bar))
+ return OPAL_PARAMETER;
+
+ return alloc_mem_bar(dev, size, bar);
+}
+opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
+
+static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t __unused bdfn)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ struct npu2_dev *dev;
+
+
+ if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
+ return OPAL_PARAMETER;
+
+ dev = phb_to_npu2_dev_ocapi(phb);
+ if (!dev)
+ return OPAL_PARAMETER;
+
+ return release_mem_bar(dev);
+}
+opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
diff --git a/hw/phys-map.c b/hw/phys-map.c
index fe949e4..7583629 100644
--- a/hw/phys-map.c
+++ b/hw/phys-map.c
@@ -52,6 +52,17 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
{ GPU_MEM_4T_UP, 2, 0x0000044000000000ull, 0x0000002000000000ull },
{ GPU_MEM_4T_UP, 3, 0x0000046000000000ull, 0x0000002000000000ull },
+ /*
+ * OpenCAPI LPC Memory - single 4TB range per chip, fills
+ * whole second non-mirrored region.
+ *
+ * Longer term, we're going to use chip address extension to
+ * enable >4TB to be allocated per chip. At that point, we
+ * may have to find another way of assigning these ranges
+ * outside of phys-map.
+ */
+ { OCAPI_MEM, 0, 0x0002000000000000ull, 0x0000040000000000ull },
+
/* 0 TB offset @ MMIO 0x0006000000000000ull */
{ PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
{ PHB4_64BIT_MMIO, 1, 0x0006004000000000ull, 0x0000004000000000ull },
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 61e8ea8..3cb587a 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -239,6 +239,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_CQ_CTL_STATUS 0x090
#define NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED PPC_BITMASK(48, 49)
#define NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED PPC_BITMASK(50, 51)
+#define NPU2_CQ_CTL_MISC_PA0_CONFIG 0x0A0 /* or should that be CS */
+#define NPU2_CQ_CTL_MISC_PA1_CONFIG 0x0A8 /* or should that be CS */
+#define NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH PPC_BITMASK(0,2)
+#define NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE PPC_BIT(3)
+#define NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE PPC_BITMASK(4,7)
+#define NPU2_CQ_CTL_MISC_PA_CONFIG_MODE PPC_BITMASK(8,11)
+#define NPU2_CQ_CTL_MISC_PA_CONFIG_MASK PPC_BITMASK(13,19)
#define NPU2_CQ_C_ERR_RPT_MSG0 0x0C0
#define NPU2_CQ_C_ERR_RPT_MSG1 0x0C8
#define NPU2_CQ_C_ERR_RPT_FIRST0 0x0D0
diff --git a/include/npu2.h b/include/npu2.h
index d58aab4..5b2a436 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -180,8 +180,10 @@ struct npu2 {
uint64_t tve_cache[16];
bool tx_zcal_complete[2];
- /* Used to protect global MMIO space, in particular the XTS
- * tables. */
+ /*
+ * Used to protect global MMIO space, in particular the XTS
+ * tables, and LPC allocation
+ */
struct lock lock;
/* NVLink */
@@ -193,6 +195,11 @@ struct npu2 {
struct lock i2c_lock;
uint8_t i2c_pin_mode;
uint8_t i2c_pin_wr_state;
+ /*
+ * Which device currently has an LPC allocation.
+ * Temporary as long as we only support 1 LPC alloc per chip.
+ */
+ struct npu2_dev *lpc_mem_allocated;
};
static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
diff --git a/include/opal-api.h b/include/opal-api.h
index e461c9d..b0ad435 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -227,8 +227,8 @@
#define OPAL_NPU_SET_RELAXED_ORDER 168
#define OPAL_NPU_GET_RELAXED_ORDER 169
#define OPAL_XIVE_GET_VP_STATE 170 /* Get NVT state */
-#define OPAL_NPU_RESERVED1 171 /* LPC Allocate */
-#define OPAL_NPU_RESERVED2 172 /* LPC Release */
+#define OPAL_NPU_MEM_ALLOC 171
+#define OPAL_NPU_MEM_RELEASE 172
#define OPAL_LAST 172
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
diff --git a/include/phys-map.h b/include/phys-map.h
index 73adda0..0cf48b6 100644
--- a/include/phys-map.h
+++ b/include/phys-map.h
@@ -28,6 +28,7 @@ enum phys_map_type {
SYSTEM_MEM,
GPU_MEM_4T_DOWN,
GPU_MEM_4T_UP,
+ OCAPI_MEM,
PHB4_64BIT_MMIO,
PHB4_32BIT_MMIO,
PHB4_XIVE_ESB,