aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/npu-opal.c11
-rw-r--r--hw/pau.c154
-rw-r--r--include/pau-regs.h13
-rw-r--r--include/pau.h7
4 files changed, 185 insertions, 0 deletions
diff --git a/hw/npu-opal.c b/hw/npu-opal.c
index 73158b1..cf13690 100644
--- a/hw/npu-opal.c
+++ b/hw/npu-opal.c
@@ -7,6 +7,7 @@
#include <pci.h>
#include <phb4.h>
#include <npu2.h>
+#include <pau.h>
#define TL_RATE_BUF_SIZE 32
@@ -184,6 +185,9 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
if (phb->phb_type == phb_type_npu_v2_opencapi)
return npu2_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
+ if (phb->phb_type == phb_type_pau_opencapi)
+ return pau_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
+
return OPAL_PARAMETER;
}
opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4);
@@ -202,6 +206,9 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
if (phb->phb_type == phb_type_npu_v2_opencapi)
return npu2_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
+ if (phb->phb_type == phb_type_pau_opencapi)
+ return pau_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
+
return OPAL_PARAMETER;
}
opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3);
@@ -222,6 +229,10 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
return npu2_opencapi_tl_set(phb, bdfn, capabilities,
rate);
+ if (phb->phb_type == phb_type_pau_opencapi)
+ return pau_opencapi_tl_set(phb, bdfn, capabilities,
+ rate);
+
return OPAL_PARAMETER;
}
opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
diff --git a/hw/pau.c b/hw/pau.c
index 8425567..83cd6fe 100644
--- a/hw/pau.c
+++ b/hw/pau.c
@@ -14,6 +14,9 @@
#define PAU_MAX_PE_NUM 16
#define PAU_RESERVED_PE_NUM 15
+#define PAU_TL_MAX_TEMPLATE 63
+#define PAU_TL_RATE_BUF_SIZE 32
+
#define PAU_SLOT_NORMAL PCI_SLOT_STATE_NORMAL
#define PAU_SLOT_LINK PCI_SLOT_STATE_LINK
#define PAU_SLOT_LINK_START (PAU_SLOT_LINK + 1)
@@ -265,6 +268,157 @@ static void pau_device_detect_fixup(struct pau_dev *dev)
dt_add_property_strings(dn, "ibm,pau-link-type", "unknown");
}
+int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
+ uint64_t addr, uint64_t PE_mask)
+{
+ struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+ struct pau *pau = dev->pau;
+ uint64_t reg, val;
+ int64_t rc;
+
+ lock(&pau->lock);
+
+ reg = PAU_XSL_OSL_SPAP_AN(dev->index);
+ val = pau_read(pau, reg);
+ if ((addr && (val & PAU_XSL_OSL_SPAP_AN_EN)) ||
+ (!addr && !(val & PAU_XSL_OSL_SPAP_AN_EN))) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ /* SPA is disabled by passing a NULL address */
+ val = addr;
+ if (addr)
+ val = addr | PAU_XSL_OSL_SPAP_AN_EN;
+ pau_write(pau, reg, val);
+
+ /*
+ * set the PE mask that the OS uses for PASID -> PE handle
+ * conversion
+ */
+ reg = PAU_OTL_MISC_CFG0(dev->index);
+ val = pau_read(pau, reg);
+ val = SETFIELD(PAU_OTL_MISC_CFG0_PE_MASK, val, PE_mask);
+ pau_write(pau, reg, val);
+ rc = OPAL_SUCCESS;
+out:
+ unlock(&pau->lock);
+ return rc;
+}
+
+int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
+ uint32_t __unused bdfn,
+ uint64_t PE_handle)
+{
+ struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+ struct pau *pau = dev->pau;
+ uint64_t reg, val;
+ int64_t rc, retries = 5;
+
+ lock(&pau->lock);
+
+ reg = PAU_XSL_OSL_CCINV;
+ val = pau_read(pau, reg);
+ if (val & PAU_XSL_OSL_CCINV_PENDING) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ val = PAU_XSL_OSL_CCINV_REMOVE;
+ val |= SETFIELD(PAU_XSL_OSL_CCINV_PE_HANDLE, val, PE_handle);
+ if (dev->index)
+ val |= PAU_XSL_OSL_CCINV_BRICK;
+ pau_write(pau, reg, val);
+
+ rc = OPAL_HARDWARE;
+ while (retries--) {
+ val = pau_read(pau, reg);
+ if (!(val & PAU_XSL_OSL_CCINV_PENDING)) {
+ rc = OPAL_SUCCESS;
+ break;
+ }
+ /* the bit expected to flip in less than 200us */
+ time_wait_us(200);
+ }
+out:
+ unlock(&pau->lock);
+ return rc;
+}
+
+static int pau_opencapi_get_templ_rate(unsigned int templ,
+ char *rate_buf)
+{
+ int shift, idx, val;
+
+ /*
+ * Each rate is encoded over 4 bits (0->15), with 15 being the
+ * slowest. The buffer is a succession of rates for all the
+ * templates. The first 4 bits are for template 63, followed
+ * by 4 bits for template 62, ... etc. So the rate for
+ * template 0 is at the very end of the buffer.
+ */
+ idx = (PAU_TL_MAX_TEMPLATE - templ) / 2;
+ shift = 4 * (1 - ((PAU_TL_MAX_TEMPLATE - templ) % 2));
+ val = rate_buf[idx] >> shift;
+ return val;
+}
+
+static bool pau_opencapi_is_templ_supported(unsigned int templ,
+ long capabilities)
+{
+ return !!(capabilities & (1ull << templ));
+}
+
+int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
+ long capabilities, char *rate_buf)
+{
+ struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+ struct pau *pau;
+ uint64_t reg, val, templ_rate;
+ int i, rate_pos;
+
+ if (!dev)
+ return OPAL_PARAMETER;
+ pau = dev->pau;
+
+ /* The 'capabilities' argument defines what TL template the
+ * device can receive. OpenCAPI 5.0 defines 64 templates, so
+ * that's one bit per template.
+ *
+ * For each template, the device processing time may vary, so
+ * the device advertises at what rate a message of a given
+ * template can be sent. That's encoded in the 'rate' buffer.
+ *
+ * On P10, PAU only knows about TL templates 0 -> 3.
+ * Per the spec, template 0 must be supported.
+ */
+ if (!pau_opencapi_is_templ_supported(0, capabilities))
+ return OPAL_PARAMETER;
+
+ reg = PAU_OTL_MISC_CFG_TX(dev->index);
+ val = pau_read(pau, reg);
+ val &= ~PAU_OTL_MISC_CFG_TX_TEMP1_EN;
+ val &= ~PAU_OTL_MISC_CFG_TX_TEMP2_EN;
+ val &= ~PAU_OTL_MISC_CFG_TX_TEMP3_EN;
+
+ for (i = 0; i < 4; i++) {
+ /* Skip template 0 as it is implicitly enabled.
+ * Enable other template If supported by AFU
+ */
+ if (i && pau_opencapi_is_templ_supported(i, capabilities))
+ val |= PAU_OTL_MISC_CFG_TX_TEMP_EN(i);
+ /* The tx rate should still be set for template 0 */
+ templ_rate = pau_opencapi_get_templ_rate(i, rate_buf);
+ rate_pos = 8 + i * 4;
+ val = SETFIELD(PAU_OTL_MISC_CFG_TX_TEMP_RATE(rate_pos, rate_pos + 3),
+ val, templ_rate);
+ }
+ pau_write(pau, reg, val);
+ PAUDEVDBG(dev, "OTL configuration register set to %llx\n", val);
+
+ return OPAL_SUCCESS;
+}
+
#define CQ_CTL_STATUS_TIMEOUT 10 /* milliseconds */
static int pau_opencapi_set_fence_control(struct pau_dev *dev,
diff --git a/include/pau-regs.h b/include/pau-regs.h
index 7a5aaa5..57c2d72 100644
--- a/include/pau-regs.h
+++ b/include/pau-regs.h
@@ -118,6 +118,7 @@
#define PAU_OTL_MISC_CFG0_EN PPC_BIT(0)
#define PAU_OTL_MISC_CFG0_BLOCK_PE_HANDLE PPC_BIT(1)
#define PAU_OTL_MISC_CFG0_BRICKID PPC_BITMASK(2, 3)
+#define PAU_OTL_MISC_CFG0_PE_MASK PPC_BITMASK(4, 7)
#define PAU_OTL_MISC_CFG0_ENABLE_4_0 PPC_BIT(51)
#define PAU_OTL_MISC_CFG0_XLATE_RELEASE PPC_BIT(62)
#define PAU_OTL_MISC_CFG0_ENABLE_5_0 PPC_BIT(63)
@@ -132,11 +133,16 @@
#define PAU_OTL_MISC_CFG_TLX_CREDITS_DCP2 PPC_BITMASK(48, 55)
#define PAU_OTL_MISC_CFG_TLX_CREDITS_DCP3 PPC_BITMASK(56, 63)
#define PAU_OTL_MISC_CFG_TX(brk) (PAU_BLOCK_OTL(brk) + 0x058)
+#define PAU_OTL_MISC_CFG_TX_TEMP1_EN PPC_BIT(1)
+#define PAU_OTL_MISC_CFG_TX_TEMP2_EN PPC_BIT(2)
+#define PAU_OTL_MISC_CFG_TX_TEMP3_EN PPC_BIT(3)
+#define PAU_OTL_MISC_CFG_TX_TEMP_EN(n) PPC_BIT(n)
#define PAU_OTL_MISC_CFG_TX_DRDY_WAIT PPC_BITMASK(5, 7)
#define PAU_OTL_MISC_CFG_TX_TEMP0_RATE PPC_BITMASK(8, 11)
#define PAU_OTL_MISC_CFG_TX_TEMP1_RATE PPC_BITMASK(12, 15)
#define PAU_OTL_MISC_CFG_TX_TEMP2_RATE PPC_BITMASK(16, 19)
#define PAU_OTL_MISC_CFG_TX_TEMP3_RATE PPC_BITMASK(20, 23)
+#define PAU_OTL_MISC_CFG_TX_TEMP_RATE(nib0, nib1) PPC_BITMASK(nib0, nib1)
#define PAU_OTL_MISC_CFG_TX_CRET_FREQ PPC_BITMASK(32, 34)
#define PAU_OTL_MISC_OTL_REM0(brk) (PAU_BLOCK_OTL(brk) + 0x068)
#define PAU_OTL_MISC_ERROR_SIG_RXI(brk) (PAU_BLOCK_OTL(brk) + 0x070)
@@ -150,11 +156,18 @@
#define PAU_OTL_MISC_PSL_PEHANDLE_AN(brk) (PAU_BLOCK_OTL_PSL(brk) + 0x018)
/* XSL block registers */
+#define PAU_XSL_OSL_SPAP_AN(brk) (PAU_BLOCK_XSL + 0x000 + (brk) * 8)
+#define PAU_XSL_OSL_SPAP_AN_EN PPC_BIT(63)
#define PAU_XSL_WRAP_CFG (PAU_BLOCK_XSL + 0x100)
#define PAU_XSL_WRAP_CFG_CLOCK_ENABLE PPC_BIT(0)
#define PAU_XSL_OSL_XLATE_CFG(brk) (PAU_BLOCK_XSL + 0x040 + (brk) * 8)
#define PAU_XSL_OSL_XLATE_CFG_AFU_DIAL PPC_BIT(0)
#define PAU_XSL_OSL_XLATE_CFG_OPENCAPI3 PPC_BIT(32)
+#define PAU_XSL_OSL_CCINV (PAU_BLOCK_XSL + 0x070)
+#define PAU_XSL_OSL_CCINV_REMOVE PPC_BIT(15)
+#define PAU_XSL_OSL_CCINV_PENDING PPC_BIT(16)
+#define PAU_XSL_OSL_CCINV_BRICK PPC_BIT(47)
+#define PAU_XSL_OSL_CCINV_PE_HANDLE PPC_BITMASK(48, 62)
/* XTS block registers */
#define PAU_XTS_CFG (PAU_BLOCK_PAU_XTS + 0x020)
diff --git a/include/pau.h b/include/pau.h
index c0a0940..894007d 100644
--- a/include/pau.h
+++ b/include/pau.h
@@ -200,6 +200,13 @@ static inline uint64_t pau_read(struct pau *pau, uint64_t reg)
}
void pau_opencapi_dump_scoms(struct pau *pau);
+int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
+ uint64_t addr, uint64_t PE_mask);
+int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
+ uint32_t __unused bdfn,
+ uint64_t PE_handle);
+int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
+ long capabilities, char *rate_buf);
/* PHY */
int pau_dev_phy_reset(struct pau_dev *dev);