diff options
-rw-r--r-- | doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst (renamed from doc/opal-api/opal-npu2-opencapi-159-160-161.rst) | 77 | ||||
-rw-r--r-- | hw/npu2-opencapi.c | 181 | ||||
-rw-r--r-- | hw/phys-map.c | 11 | ||||
-rw-r--r-- | include/npu2-regs.h | 7 | ||||
-rw-r--r-- | include/npu2.h | 11 | ||||
-rw-r--r-- | include/opal-api.h | 4 | ||||
-rw-r--r-- | include/phys-map.h | 1 |
7 files changed, 288 insertions, 4 deletions
diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst index 4db3d3e..6d603f1 100644 --- a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst +++ b/doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst @@ -124,3 +124,80 @@ OPAL_SUCCESS OPAL_PARAMETER A provided parameter was invalid + + .. _OPAL_NPU_MEM_ALLOC: + +OPAL_NPU_MEM_ALLOC +================== + +OpenCAPI devices only. + +Sets up the NPU memory BAR for Lowest Point of Coherency (LPC) memory. + +At present, only one device per CPU can use LPC memory, and a maximum of 4TB +can be allocated. + +Parameters +---------- +:: + + uint64_t phb_id + uint32_t bdfn + uint64_t size + uint64_t *bar + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``size`` + Size of requested LPC memory area in bytes + +``bar`` + Pointer to variable where base of LPC memory area will be returned + +Return Values +------------- + +OPAL_SUCCESS + BAR setup completed successfully + +OPAL_PARAMETER + A provided parameter was invalid + +OPAL_RESOURCE + The BAR could not be assigned due to limitations + +.. _OPAL_NPU_MEM_RELEASE: + +OPAL_NPU_MEM_RELEASE +==================== + +OpenCAPI devices only. + +Releases NPU memory BAR. + +Parameters +---------- +:: + + uint64_t phb_id + uint32_t bdfn + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +Return Values +------------- + +OPAL_SUCCESS + BAR setup completed successfully + +OPAL_PARAMETER + A provided parameter was invalid, or the specified device does not currently + have LPC memory assigned diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index 27dfc12..7a90cfa 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -2025,3 +2025,184 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn, return OPAL_SUCCESS; } opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5); + +static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size) +{ + uint64_t stack, val, reg, bar_offset, pa_config_offset; + uint8_t memsel; + + stack = index_to_stack(dev->brick_index); + switch (dev->brick_index) { + case 2: + case 4: + bar_offset = NPU2_GPU0_MEM_BAR; + pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG; + break; + case 3: + case 5: + bar_offset = NPU2_GPU1_MEM_BAR; + pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG; + break; + default: + assert(false); + } + + assert((!size && !base) || (size && base)); + + /* + * Memory select configuration: + * - 0b000 - BAR disabled + * - 0b001 - match 0b00, 0b01 + * - 0b010 - match 0b01, 0b10 + * - 0b011 - match 0b00, 0b10 + * - 0b100 - match 0b00 + * - 0b101 - match 0b01 + * - 0b110 - match 0b10 + * - 0b111 - match 0b00, 0b01, 0b10 + */ + memsel = GETFIELD(PPC_BITMASK(13, 14), base); + if (size) + val = SETFIELD(NPU2_MEM_BAR_EN | NPU2_MEM_BAR_SEL_MEM, 0ULL, 0b100 + memsel); + else + val = 0; + + /* Base address - 12 bits, 1G aligned */ + val = SETFIELD(NPU2_MEM_BAR_NODE_ADDR, val, GETFIELD(PPC_BITMASK(22, 33), base)); + + /* GCID */ + val = SETFIELD(NPU2_MEM_BAR_GROUP, val, GETFIELD(PPC_BITMASK(15, 18), base)); + val = SETFIELD(NPU2_MEM_BAR_CHIP, val, GETFIELD(PPC_BITMASK(19, 21), base)); + + /* Other settings */ + val = SETFIELD(NPU2_MEM_BAR_POISON, val, 1); + val = SETFIELD(NPU2_MEM_BAR_GRANULE, val, 0); + val = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, val, ilog2(size >> 30)); + val = SETFIELD(NPU2_MEM_BAR_MODE, val, 0); + + for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { + reg = NPU2_REG_OFFSET(stack, block, bar_offset); + npu2_write(dev->npu, reg, val); + } + + /* Set PA config */ + if (size) + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH, 0ULL, 0b100 + memsel); + else + val = 0; + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE, val, 0); + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE, val, ilog2(size >> 30)); + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MODE, val, 0); + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MASK, val, 0); + reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset); + npu2_write(dev->npu, reg, val); +} + +static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar) +{ + uint64_t phys_map_base, phys_map_size; + int rc = OPAL_SUCCESS; + + lock(&dev->npu->lock); + + /* + * Right now, we support 1 allocation per chip, of up to 4TB. + * + * In future, we will use chip address extension to support + * >4TB ranges, and we will implement a more sophisticated + * allocator to allow an allocation for every link on a chip. + */ + + if (dev->npu->lpc_mem_allocated) { + rc = OPAL_RESOURCE; + goto out; + } + + phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size); + + if (size > phys_map_size) { + /** + * @fwts-label OCAPIInvalidLPCMemoryBARSize + * @fwts-advice The operating system requested an unsupported + * amount of OpenCAPI LPC memory. This is possibly a kernel + * bug, or you may need to upgrade your firmware. + */ + OCAPIERR(dev, "Invalid LPC memory BAR allocation size requested: 0x%llx bytes (limit 0x%llx)\n", + size, phys_map_size); + rc = OPAL_PARAMETER; + goto out; + } + + /* Minimum BAR size is 1 GB */ + if (size < (1 << 30)) { + size = 1 << 30; + } + + if (!is_pow2(size)) { + size = 1 << (ilog2(size) + 1); + } + + set_mem_bar(dev, phys_map_base, size); + *bar = phys_map_base; + dev->npu->lpc_mem_allocated = dev; + +out: + unlock(&dev->npu->lock); + return rc; +} + +static int64_t release_mem_bar(struct npu2_dev *dev) +{ + int rc = OPAL_SUCCESS; + + lock(&dev->npu->lock); + + if (dev->npu->lpc_mem_allocated != dev) { + rc = OPAL_PARAMETER; + goto out; + } + + set_mem_bar(dev, 0, 0); + dev->npu->lpc_mem_allocated = NULL; + +out: + unlock(&dev->npu->lock); + return rc; +} + +static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn, + uint64_t size, uint64_t *bar) +{ + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + if (!opal_addr_valid(bar)) + return OPAL_PARAMETER; + + return alloc_mem_bar(dev, size, bar); +} +opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4); + +static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t __unused bdfn) +{ + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + return release_mem_bar(dev); +} +opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2); diff --git a/hw/phys-map.c b/hw/phys-map.c index fe949e4..7583629 100644 --- a/hw/phys-map.c +++ b/hw/phys-map.c @@ -52,6 +52,17 @@ static const struct phys_map_entry phys_map_table_nimbus[] = { { GPU_MEM_4T_UP, 2, 0x0000044000000000ull, 0x0000002000000000ull }, { GPU_MEM_4T_UP, 3, 0x0000046000000000ull, 0x0000002000000000ull }, + /* + * OpenCAPI LPC Memory - single 4TB range per chip, fills + * whole second non-mirrored region. + * + * Longer term, we're going to use chip address extension to + * enable >4TB to be allocated per chip. At that point, we + * may have to find another way of assigning these ranges + * outside of phys-map. + */ + { OCAPI_MEM, 0, 0x0002000000000000ull, 0x0000040000000000ull }, + /* 0 TB offset @ MMIO 0x0006000000000000ull */ { PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull }, { PHB4_64BIT_MMIO, 1, 0x0006004000000000ull, 0x0000004000000000ull }, diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 61e8ea8..3cb587a 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -239,6 +239,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_CQ_CTL_STATUS 0x090 #define NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED PPC_BITMASK(48, 49) #define NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED PPC_BITMASK(50, 51) +#define NPU2_CQ_CTL_MISC_PA0_CONFIG 0x0A0 /* or should that be CS */ +#define NPU2_CQ_CTL_MISC_PA1_CONFIG 0x0A8 /* or should that be CS */ +#define NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH PPC_BITMASK(0,2) +#define NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE PPC_BIT(3) +#define NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE PPC_BITMASK(4,7) +#define NPU2_CQ_CTL_MISC_PA_CONFIG_MODE PPC_BITMASK(8,11) +#define NPU2_CQ_CTL_MISC_PA_CONFIG_MASK PPC_BITMASK(13,19) #define NPU2_CQ_C_ERR_RPT_MSG0 0x0C0 #define NPU2_CQ_C_ERR_RPT_MSG1 0x0C8 #define NPU2_CQ_C_ERR_RPT_FIRST0 0x0D0 diff --git a/include/npu2.h b/include/npu2.h index d58aab4..5b2a436 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -180,8 +180,10 @@ struct npu2 { uint64_t tve_cache[16]; bool tx_zcal_complete[2]; - /* Used to protect global MMIO space, in particular the XTS - * tables. */ + /* + * Used to protect global MMIO space, in particular the XTS + * tables, and LPC allocation + */ struct lock lock; /* NVLink */ @@ -193,6 +195,11 @@ struct npu2 { struct lock i2c_lock; uint8_t i2c_pin_mode; uint8_t i2c_pin_wr_state; + /* + * Which device currently has an LPC allocation. + * Temporary as long as we only support 1 LPC alloc per chip. + */ + struct npu2_dev *lpc_mem_allocated; }; static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb) diff --git a/include/opal-api.h b/include/opal-api.h index e461c9d..b0ad435 100644 --- a/include/opal-api.h +++ b/include/opal-api.h @@ -227,8 +227,8 @@ #define OPAL_NPU_SET_RELAXED_ORDER 168 #define OPAL_NPU_GET_RELAXED_ORDER 169 #define OPAL_XIVE_GET_VP_STATE 170 /* Get NVT state */ -#define OPAL_NPU_RESERVED1 171 /* LPC Allocate */ -#define OPAL_NPU_RESERVED2 172 /* LPC Release */ +#define OPAL_NPU_MEM_ALLOC 171 +#define OPAL_NPU_MEM_RELEASE 172 #define OPAL_LAST 172 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ diff --git a/include/phys-map.h b/include/phys-map.h index 73adda0..0cf48b6 100644 --- a/include/phys-map.h +++ b/include/phys-map.h @@ -28,6 +28,7 @@ enum phys_map_type { SYSTEM_MEM, GPU_MEM_4T_DOWN, GPU_MEM_4T_UP, + OCAPI_MEM, PHB4_64BIT_MMIO, PHB4_32BIT_MMIO, PHB4_XIVE_ESB, |