aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/opal-api/opal-npu2-opencapi-159-160-161.rst126
-rw-r--r--hw/npu2-opencapi.c206
-rw-r--r--include/npu2-regs.h4
-rw-r--r--include/opal-api.h6
4 files changed, 339 insertions, 3 deletions
diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst
new file mode 100644
index 0000000..4db3d3e
--- /dev/null
+++ b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst
@@ -0,0 +1,126 @@
+.. _OPAL_NPU_SPA_SETUP:
+
+OPAL_NPU_SPA_SETUP
+==================
+
+OpenCAPI devices only.
+
+Sets up a Shared Process Area (SPA) with the Shared Process Area
+Pointer (SPAP) set to the provided address `addr`, and sets the OTL PE
+mask (used for PASID to PE handle conversion) to `PE_mask`.
+
+If `addr` is NULL, the SPA will be disabled. `addr` must be 4K aligned.
+
+Parameters
+----------
+::
+
+ uint64_t phb_id
+ int bdfn
+ uint64_t addr
+ uint64_t PE_mask
+
+``phb_id``
+ OPAL ID of PHB
+
+``bdfn``
+ Bus-Device-Function number of OpenCAPI AFU
+
+``addr``
+ Address of Shared Process Area, or NULL to disable SPA. Must be 4K aligned.
+
+``PE_mask``
+ Process Element mask for PASID to PE handle conversion
+
+Return Values
+-------------
+
+OPAL_SUCCESS
+ SPAP and PE mask were successfully set
+
+OPAL_PARAMETER
+ A provided parameter was invalid
+
+OPAL_BUSY
+ SPA is already enabled (or if addr is NULL, SPA is already disabled)
+
+.. _OPAL_NPU_SPA_CLEAR_CACHE:
+
+OPAL_NPU_SPA_CLEAR_CACHE
+========================
+
+OpenCAPI devices only.
+
+Invalidates the Process Element with the given `PE_handle` from the NPU's SPA cache.
+
+Parameters
+----------
+::
+
+ uint64_t phb_id
+ uint32_t bdfn
+ uint64_t PE_handle
+
+``phb_id``
+ OPAL ID of PHB
+
+``bdfn``
+ Bus-Device-Function number of OpenCAPI AFU
+
+``PE_handle``
+ Handle of Process Element being cleared from SPA cache
+
+Return Values
+-------------
+
+OPAL_SUCCESS
+ PE was successfully cleared from SPA cache
+
+OPAL_PARAMETER
+ A provided parameter was invalid
+
+OPAL_BUSY
+ XSLO is currently invalidating a previously requested entry
+
+.. _OPAL_NPU_TL_SET:
+
+OPAL_NPU_TL_SET
+===============
+
+OpenCAPI devices only.
+
+Update the NPU OTL configuration with device capabilities.
+
+Parameters
+----------
+::
+
+ uint64_t phb_id
+ uint32_t bdfn
+ long capabilities
+ uint64_t rate_phys
+ int rate_sz
+
+``phb_id``
+ OPAL ID of PHB
+
+``bdfn``
+ Bus-Device-Function number of OpenCAPI AFU
+
+``capabilities``
+ Bitmap of TL templates the device can receive
+
+``rate_phys``
+ Physical address of rates buffer
+
+``rate_sz``
+ Size of rates buffer (must be equal to 32)
+
+Return Values
+-------------
+
+OPAL_SUCCESS
+ OTL configuration was successfully updated
+
+OPAL_PARAMETER
+ A provided parameter was invalid
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 1df769d..d8c2714 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -54,6 +54,9 @@
#define NPU_IRQ_LEVELS 35
#define NPU_IRQ_LEVELS_XSL 23
+#define MAX_PE_HANDLE ((1 << 15) - 1)
+#define TL_MAX_TEMPLATE 63
+#define TL_RATE_BUF_SIZE 32
enum npu2_link_training_state {
NPU2_TRAIN_DEFAULT, /* fully train the link */
@@ -1500,3 +1503,206 @@ static const struct phb_ops npu2_opencapi_ops = {
.set_capp_recovery = NULL,
.tce_kill = NULL,
};
+
+static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn,
+ uint64_t addr, uint64_t PE_mask)
+{
+ uint64_t stack, block, offset, reg;
+ struct phb *phb = pci_get_phb(phb_id);
+ struct npu2_dev *dev;
+ int rc;
+
+ if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
+ return OPAL_PARAMETER;
+
+ /* 4k aligned */
+ if (addr & 0xFFF)
+ return OPAL_PARAMETER;
+
+ if (PE_mask > 15)
+ return OPAL_PARAMETER;
+
+ dev = phb_to_npu2_dev_ocapi(phb);
+ if (!dev)
+ return OPAL_PARAMETER;
+
+ block = index_to_block(dev->index);
+ stack = index_to_stack(dev->index);
+ if (block == NPU2_BLOCK_OTL1)
+ offset = NPU2_XSL_PSL_SPAP_A1;
+ else
+ offset = NPU2_XSL_PSL_SPAP_A0;
+
+
+ lock(&dev->npu->lock);
+ /*
+ * set the SPAP used by the device
+ */
+ reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset),
+ NPU2_MISC_DA_LEN_8B);
+ if ((addr && (reg & NPU2_XSL_PSL_SPAP_EN)) ||
+ (!addr && !(reg & NPU2_XSL_PSL_SPAP_EN))) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+ /* SPA is disabled by passing a NULL address */
+ reg = addr;
+ if (addr)
+ reg = addr | NPU2_XSL_PSL_SPAP_EN;
+
+ npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /*
+ * set the PE mask that the OS uses for PASID -> PE handle
+ * conversion
+ */
+ reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
+ NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B);
+ reg &= ~NPU2_OTL_CONFIG0_PE_MASK;
+ reg |= (PE_mask << (63-7));
+ npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base,
+ NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B,
+ reg);
+ rc = OPAL_SUCCESS;
+out:
+ unlock(&dev->npu->lock);
+ return rc;
+}
+opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4);
+
+static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn,
+ uint64_t PE_handle)
+{
+ uint64_t cc_inv, stack, block, reg, rc;
+ uint32_t retries = 5;
+ struct phb *phb = pci_get_phb(phb_id);
+ struct npu2_dev *dev;
+
+ if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
+ return OPAL_PARAMETER;
+
+ if (PE_handle > MAX_PE_HANDLE)
+ return OPAL_PARAMETER;
+
+ dev = phb_to_npu2_dev_ocapi(phb);
+ if (!dev)
+ return OPAL_PARAMETER;
+
+ block = index_to_block(dev->index);
+ stack = index_to_stack(dev->index);
+ cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0);
+
+ lock(&dev->npu->lock);
+ reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, cc_inv,
+ NPU2_MISC_DA_LEN_8B);
+ if (reg & PPC_BIT(16)) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ reg = PE_handle | PPC_BIT(15);
+ if (block == NPU2_BLOCK_OTL1)
+ reg |= PPC_BIT(48);
+ npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, cc_inv,
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ rc = OPAL_HARDWARE;
+ while (retries--) {
+ reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
+ cc_inv, NPU2_MISC_DA_LEN_8B);
+ if (!(reg & PPC_BIT(16))) {
+ rc = OPAL_SUCCESS;
+ break;
+ }
+ /* the bit expected to flip in less than 200us */
+ time_wait_us(200);
+ }
+out:
+ unlock(&dev->npu->lock);
+ return rc;
+}
+opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3);
+
+static int get_template_rate(unsigned int templ, char *rate_buf)
+{
+ int shift, idx, val;
+
+ /*
+ * Each rate is encoded over 4 bits (0->15), with 15 being the
+ * slowest. The buffer is a succession of rates for all the
+ * templates. The first 4 bits are for template 63, followed
+ * by 4 bits for template 62, ... etc. So the rate for
+ * template 0 is at the very end of the buffer.
+ */
+ idx = (TL_MAX_TEMPLATE - templ) / 2;
+ shift = 4 * (1 - ((TL_MAX_TEMPLATE - templ) % 2));
+ val = rate_buf[idx] >> shift;
+ return val;
+}
+
+static bool is_template_supported(unsigned int templ, long capabilities)
+{
+ return !!(capabilities & (1ull << templ));
+}
+
+static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
+ long capabilities, uint64_t rate_phys, int rate_sz)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ struct npu2_dev *dev;
+ uint64_t stack, block, reg, templ_rate;
+ int i, rate_pos;
+ char *rate = (char *) rate_phys;
+
+ if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
+ return OPAL_PARAMETER;
+ if (!opal_addr_valid(rate) || rate_sz != TL_RATE_BUF_SIZE)
+ return OPAL_PARAMETER;
+
+ dev = phb_to_npu2_dev_ocapi(phb);
+ if (!dev)
+ return OPAL_PARAMETER;
+
+ block = index_to_block(dev->index);
+ stack = index_to_stack(dev->index);
+ /*
+ * The 'capabilities' argument defines what TL template the
+ * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so
+ * that's one bit per template.
+ *
+ * For each template, the device processing time may vary, so
+ * the device advertises at what rate a message of a given
+ * template can be sent. That's encoded in the 'rate' buffer.
+ *
+ * On P9, NPU only knows about TL templates 0 -> 3.
+ * Per the spec, template 0 must be supported.
+ */
+ if (!is_template_supported(0, capabilities))
+ return OPAL_PARAMETER;
+
+ reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
+ NPU2_OTL_CONFIG1(stack, block),
+ NPU2_MISC_DA_LEN_8B);
+ reg &= ~(NPU2_OTL_CONFIG1_TX_TEMP1_EN | NPU2_OTL_CONFIG1_TX_TEMP3_EN |
+ NPU2_OTL_CONFIG1_TX_TEMP1_EN);
+ for (i = 0; i < 4; i++) {
+ /* Skip template 0 as it is implicitly enabled */
+ if (i && is_template_supported(i, capabilities))
+ reg |= PPC_BIT(i);
+ /* The tx rate should still be set for template 0 */
+ templ_rate = get_template_rate(i, rate);
+ rate_pos = 8 + i * 4;
+ reg = SETFIELD(PPC_BITMASK(rate_pos, rate_pos + 3), reg,
+ templ_rate);
+ }
+ npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base,
+ NPU2_OTL_CONFIG1(stack, block), NPU2_MISC_DA_LEN_8B,
+ reg);
+ prlog(PR_DEBUG, "OCAPI: Link %llx:%x, TL conf1 register set to %llx\n",
+ phb_id, bdfn, reg);
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index faaf5a1..db6e279 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -327,6 +327,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C)
/* OpenCAPI - XSL registers */
+#define NPU2_XSL_PSL_LLCMD_A0 0x008
#define NPU2_XSL_PSL_SCNTL_A0 0x010
#define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0)
#define NPU2_XSL_DEF 0x040
@@ -334,6 +335,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16)
#define NPU2_XSL_WRAP_CFG 0x0C0
#define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0)
+#define NPU2_XSL_PSL_SPAP_A0 0
+#define NPU2_XSL_PSL_SPAP_A1 0x18
+#define NPU2_XSL_PSL_SPAP_EN PPC_BIT(63)
/* OpenCAPI - OTL registers */
#define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000)
diff --git a/include/opal-api.h b/include/opal-api.h
index bb18a8b..57434d7 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -215,9 +215,9 @@
#define OPAL_SENSOR_GROUP_CLEAR 156
#define OPAL_PCI_SET_P2P 157
#define OPAL_QUIESCE 158
-#define OPAL_RESERVED_OPENCAPI_GRUMPY_AND 159
-#define OPAL_RESERVED_OPENCAPI_DISGRUNTLED 160
-#define OPAL_RESERVED_OPENCAPI_MAINTAINER 161
+#define OPAL_NPU_SPA_SETUP 159
+#define OPAL_NPU_SPA_CLEAR_CACHE 160
+#define OPAL_NPU_TL_SET 161
#define OPAL_SENSOR_READ_U64 162
#define OPAL_SENSOR_GROUP_ENABLE 163
#define OPAL_LAST 163