diff options
-rw-r--r-- | doc/opal-api/opal-npu2-opencapi-159-160-161.rst | 126 | ||||
-rw-r--r-- | hw/npu2-opencapi.c | 206 | ||||
-rw-r--r-- | include/npu2-regs.h | 4 | ||||
-rw-r--r-- | include/opal-api.h | 6 |
4 files changed, 339 insertions, 3 deletions
diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst new file mode 100644 index 0000000..4db3d3e --- /dev/null +++ b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst @@ -0,0 +1,126 @@ +.. _OPAL_NPU_SPA_SETUP: + +OPAL_NPU_SPA_SETUP +================== + +OpenCAPI devices only. + +Sets up a Shared Process Area (SPA) with the Shared Process Area +Pointer (SPAP) set to the provided address `addr`, and sets the OTL PE +mask (used for PASID to PE handle conversion) to `PE_mask`. + +If `addr` is NULL, the SPA will be disabled. `addr` must be 4K aligned. + +Parameters +---------- +:: + + uint64_t phb_id + int bdfn + uint64_t addr + uint64_t PE_mask + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``addr`` + Address of Shared Process Area, or NULL to disable SPA. Must be 4K aligned. + +``PE_mask`` + Process Element mask for PASID to PE handle conversion + +Return Values +------------- + +OPAL_SUCCESS + SPAP and PE mask were successfully set + +OPAL_PARAMETER + A provided parameter was invalid + +OPAL_BUSY + SPA is already enabled (or if addr is NULL, SPA is already disabled) + +.. _OPAL_NPU_SPA_CLEAR_CACHE: + +OPAL_NPU_SPA_CLEAR_CACHE +======================== + +OpenCAPI devices only. + +Invalidates the Process Element with the given `PE_handle` from the NPU's SPA cache. + +Parameters +---------- +:: + + uint64_t phb_id + uint32_t bdfn + uint64_t PE_handle + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``PE_handle`` + Handle of Process Element being cleared from SPA cache + +Return Values +------------- + +OPAL_SUCCESS + PE was successfully cleared from SPA cache + +OPAL_PARAMETER + A provided parameter was invalid + +OPAL_BUSY + XSLO is currently invalidating a previously requested entry + +.. _OPAL_NPU_TL_SET: + +OPAL_NPU_TL_SET +=============== + +OpenCAPI devices only. + +Update the NPU OTL configuration with device capabilities. + +Parameters +---------- +:: + + uint64_t phb_id + uint32_t bdfn + long capabilities + uint64_t rate_phys + int rate_sz + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``capabilities`` + Bitmap of TL templates the device can receive + +``rate_phys`` + Physical address of rates buffer + +``rate_sz`` + Size of rates buffer (must be equal to 32) + +Return Values +------------- + +OPAL_SUCCESS + OTL configuration was successfully updated + +OPAL_PARAMETER + A provided parameter was invalid diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index 1df769d..d8c2714 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -54,6 +54,9 @@ #define NPU_IRQ_LEVELS 35 #define NPU_IRQ_LEVELS_XSL 23 +#define MAX_PE_HANDLE ((1 << 15) - 1) +#define TL_MAX_TEMPLATE 63 +#define TL_RATE_BUF_SIZE 32 enum npu2_link_training_state { NPU2_TRAIN_DEFAULT, /* fully train the link */ @@ -1500,3 +1503,206 @@ static const struct phb_ops npu2_opencapi_ops = { .set_capp_recovery = NULL, .tce_kill = NULL, }; + +static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn, + uint64_t addr, uint64_t PE_mask) +{ + uint64_t stack, block, offset, reg; + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + int rc; + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + + /* 4k aligned */ + if (addr & 0xFFF) + return OPAL_PARAMETER; + + if (PE_mask > 15) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + block = index_to_block(dev->index); + stack = index_to_stack(dev->index); + if (block == NPU2_BLOCK_OTL1) + offset = NPU2_XSL_PSL_SPAP_A1; + else + offset = NPU2_XSL_PSL_SPAP_A0; + + + lock(&dev->npu->lock); + /* + * set the SPAP used by the device + */ + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset), + NPU2_MISC_DA_LEN_8B); + if ((addr && (reg & NPU2_XSL_PSL_SPAP_EN)) || + (!addr && !(reg & NPU2_XSL_PSL_SPAP_EN))) { + rc = OPAL_BUSY; + goto out; + } + /* SPA is disabled by passing a NULL address */ + reg = addr; + if (addr) + reg = addr | NPU2_XSL_PSL_SPAP_EN; + + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset), + NPU2_MISC_DA_LEN_8B, reg); + + /* + * set the PE mask that the OS uses for PASID -> PE handle + * conversion + */ + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B); + reg &= ~NPU2_OTL_CONFIG0_PE_MASK; + reg |= (PE_mask << (63-7)); + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B, + reg); + rc = OPAL_SUCCESS; +out: + unlock(&dev->npu->lock); + return rc; +} +opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4); + +static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn, + uint64_t PE_handle) +{ + uint64_t cc_inv, stack, block, reg, rc; + uint32_t retries = 5; + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + + if (PE_handle > MAX_PE_HANDLE) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + block = index_to_block(dev->index); + stack = index_to_stack(dev->index); + cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0); + + lock(&dev->npu->lock); + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, cc_inv, + NPU2_MISC_DA_LEN_8B); + if (reg & PPC_BIT(16)) { + rc = OPAL_BUSY; + goto out; + } + + reg = PE_handle | PPC_BIT(15); + if (block == NPU2_BLOCK_OTL1) + reg |= PPC_BIT(48); + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, cc_inv, + NPU2_MISC_DA_LEN_8B, reg); + + rc = OPAL_HARDWARE; + while (retries--) { + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + cc_inv, NPU2_MISC_DA_LEN_8B); + if (!(reg & PPC_BIT(16))) { + rc = OPAL_SUCCESS; + break; + } + /* the bit expected to flip in less than 200us */ + time_wait_us(200); + } +out: + unlock(&dev->npu->lock); + return rc; +} +opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3); + +static int get_template_rate(unsigned int templ, char *rate_buf) +{ + int shift, idx, val; + + /* + * Each rate is encoded over 4 bits (0->15), with 15 being the + * slowest. The buffer is a succession of rates for all the + * templates. The first 4 bits are for template 63, followed + * by 4 bits for template 62, ... etc. So the rate for + * template 0 is at the very end of the buffer. + */ + idx = (TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((TL_MAX_TEMPLATE - templ) % 2)); + val = rate_buf[idx] >> shift; + return val; +} + +static bool is_template_supported(unsigned int templ, long capabilities) +{ + return !!(capabilities & (1ull << templ)); +} + +static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, + long capabilities, uint64_t rate_phys, int rate_sz) +{ + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + uint64_t stack, block, reg, templ_rate; + int i, rate_pos; + char *rate = (char *) rate_phys; + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + if (!opal_addr_valid(rate) || rate_sz != TL_RATE_BUF_SIZE) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + block = index_to_block(dev->index); + stack = index_to_stack(dev->index); + /* + * The 'capabilities' argument defines what TL template the + * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so + * that's one bit per template. + * + * For each template, the device processing time may vary, so + * the device advertises at what rate a message of a given + * template can be sent. That's encoded in the 'rate' buffer. + * + * On P9, NPU only knows about TL templates 0 -> 3. + * Per the spec, template 0 must be supported. + */ + if (!is_template_supported(0, capabilities)) + return OPAL_PARAMETER; + + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG1(stack, block), + NPU2_MISC_DA_LEN_8B); + reg &= ~(NPU2_OTL_CONFIG1_TX_TEMP1_EN | NPU2_OTL_CONFIG1_TX_TEMP3_EN | + NPU2_OTL_CONFIG1_TX_TEMP1_EN); + for (i = 0; i < 4; i++) { + /* Skip template 0 as it is implicitly enabled */ + if (i && is_template_supported(i, capabilities)) + reg |= PPC_BIT(i); + /* The tx rate should still be set for template 0 */ + templ_rate = get_template_rate(i, rate); + rate_pos = 8 + i * 4; + reg = SETFIELD(PPC_BITMASK(rate_pos, rate_pos + 3), reg, + templ_rate); + } + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG1(stack, block), NPU2_MISC_DA_LEN_8B, + reg); + prlog(PR_DEBUG, "OCAPI: Link %llx:%x, TL conf1 register set to %llx\n", + phb_id, bdfn, reg); + return OPAL_SUCCESS; +} +opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5); diff --git a/include/npu2-regs.h b/include/npu2-regs.h index faaf5a1..db6e279 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -327,6 +327,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C) /* OpenCAPI - XSL registers */ +#define NPU2_XSL_PSL_LLCMD_A0 0x008 #define NPU2_XSL_PSL_SCNTL_A0 0x010 #define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0) #define NPU2_XSL_DEF 0x040 @@ -334,6 +335,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16) #define NPU2_XSL_WRAP_CFG 0x0C0 #define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0) +#define NPU2_XSL_PSL_SPAP_A0 0 +#define NPU2_XSL_PSL_SPAP_A1 0x18 +#define NPU2_XSL_PSL_SPAP_EN PPC_BIT(63) /* OpenCAPI - OTL registers */ #define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) diff --git a/include/opal-api.h b/include/opal-api.h index bb18a8b..57434d7 100644 --- a/include/opal-api.h +++ b/include/opal-api.h @@ -215,9 +215,9 @@ #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_PCI_SET_P2P 157 #define OPAL_QUIESCE 158 -#define OPAL_RESERVED_OPENCAPI_GRUMPY_AND 159 -#define OPAL_RESERVED_OPENCAPI_DISGRUNTLED 160 -#define OPAL_RESERVED_OPENCAPI_MAINTAINER 161 +#define OPAL_NPU_SPA_SETUP 159 +#define OPAL_NPU_SPA_CLEAR_CACHE 160 +#define OPAL_NPU_TL_SET 161 #define OPAL_SENSOR_READ_U64 162 #define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_LAST 163 |