diff options
-rw-r--r-- | hw/npu-opal.c | 11 | ||||
-rw-r--r-- | hw/pau.c | 154 | ||||
-rw-r--r-- | include/pau-regs.h | 13 | ||||
-rw-r--r-- | include/pau.h | 7 |
4 files changed, 185 insertions, 0 deletions
diff --git a/hw/npu-opal.c b/hw/npu-opal.c index 73158b1..cf13690 100644 --- a/hw/npu-opal.c +++ b/hw/npu-opal.c @@ -7,6 +7,7 @@ #include <pci.h> #include <phb4.h> #include <npu2.h> +#include <pau.h> #define TL_RATE_BUF_SIZE 32 @@ -184,6 +185,9 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn, if (phb->phb_type == phb_type_npu_v2_opencapi) return npu2_opencapi_spa_setup(phb, bdfn, addr, PE_mask); + if (phb->phb_type == phb_type_pau_opencapi) + return pau_opencapi_spa_setup(phb, bdfn, addr, PE_mask); + return OPAL_PARAMETER; } opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4); @@ -202,6 +206,9 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn, if (phb->phb_type == phb_type_npu_v2_opencapi) return npu2_opencapi_spa_clear_cache(phb, bdfn, PE_handle); + if (phb->phb_type == phb_type_pau_opencapi) + return pau_opencapi_spa_clear_cache(phb, bdfn, PE_handle); + return OPAL_PARAMETER; } opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3); @@ -222,6 +229,10 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, return npu2_opencapi_tl_set(phb, bdfn, capabilities, rate); + if (phb->phb_type == phb_type_pau_opencapi) + return pau_opencapi_tl_set(phb, bdfn, capabilities, + rate); + return OPAL_PARAMETER; } opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5); @@ -14,6 +14,9 @@ #define PAU_MAX_PE_NUM 16 #define PAU_RESERVED_PE_NUM 15 +#define PAU_TL_MAX_TEMPLATE 63 +#define PAU_TL_RATE_BUF_SIZE 32 + #define PAU_SLOT_NORMAL PCI_SLOT_STATE_NORMAL #define PAU_SLOT_LINK PCI_SLOT_STATE_LINK #define PAU_SLOT_LINK_START (PAU_SLOT_LINK + 1) @@ -265,6 +268,157 @@ static void pau_device_detect_fixup(struct pau_dev *dev) dt_add_property_strings(dn, "ibm,pau-link-type", "unknown"); } +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn, + uint64_t addr, uint64_t PE_mask) +{ + struct pau_dev *dev = pau_phb_to_opencapi_dev(phb); + struct pau *pau = dev->pau; + uint64_t reg, val; + int64_t rc; + + lock(&pau->lock); + + reg = PAU_XSL_OSL_SPAP_AN(dev->index); + val = pau_read(pau, reg); + if ((addr && (val & PAU_XSL_OSL_SPAP_AN_EN)) || + (!addr && !(val & PAU_XSL_OSL_SPAP_AN_EN))) { + rc = OPAL_BUSY; + goto out; + } + + /* SPA is disabled by passing a NULL address */ + val = addr; + if (addr) + val = addr | PAU_XSL_OSL_SPAP_AN_EN; + pau_write(pau, reg, val); + + /* + * set the PE mask that the OS uses for PASID -> PE handle + * conversion + */ + reg = PAU_OTL_MISC_CFG0(dev->index); + val = pau_read(pau, reg); + val = SETFIELD(PAU_OTL_MISC_CFG0_PE_MASK, val, PE_mask); + pau_write(pau, reg, val); + rc = OPAL_SUCCESS; +out: + unlock(&pau->lock); + return rc; +} + +int64_t pau_opencapi_spa_clear_cache(struct phb *phb, + uint32_t __unused bdfn, + uint64_t PE_handle) +{ + struct pau_dev *dev = pau_phb_to_opencapi_dev(phb); + struct pau *pau = dev->pau; + uint64_t reg, val; + int64_t rc, retries = 5; + + lock(&pau->lock); + + reg = PAU_XSL_OSL_CCINV; + val = pau_read(pau, reg); + if (val & PAU_XSL_OSL_CCINV_PENDING) { + rc = OPAL_BUSY; + goto out; + } + + val = PAU_XSL_OSL_CCINV_REMOVE; + val |= SETFIELD(PAU_XSL_OSL_CCINV_PE_HANDLE, val, PE_handle); + if (dev->index) + val |= PAU_XSL_OSL_CCINV_BRICK; + pau_write(pau, reg, val); + + rc = OPAL_HARDWARE; + while (retries--) { + val = pau_read(pau, reg); + if (!(val & PAU_XSL_OSL_CCINV_PENDING)) { + rc = OPAL_SUCCESS; + break; + } + /* the bit expected to flip in less than 200us */ + time_wait_us(200); + } +out: + unlock(&pau->lock); + return rc; +} + +static int pau_opencapi_get_templ_rate(unsigned int templ, + char *rate_buf) +{ + int shift, idx, val; + + /* + * Each rate is encoded over 4 bits (0->15), with 15 being the + * slowest. The buffer is a succession of rates for all the + * templates. The first 4 bits are for template 63, followed + * by 4 bits for template 62, ... etc. So the rate for + * template 0 is at the very end of the buffer. + */ + idx = (PAU_TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((PAU_TL_MAX_TEMPLATE - templ) % 2)); + val = rate_buf[idx] >> shift; + return val; +} + +static bool pau_opencapi_is_templ_supported(unsigned int templ, + long capabilities) +{ + return !!(capabilities & (1ull << templ)); +} + +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn, + long capabilities, char *rate_buf) +{ + struct pau_dev *dev = pau_phb_to_opencapi_dev(phb); + struct pau *pau; + uint64_t reg, val, templ_rate; + int i, rate_pos; + + if (!dev) + return OPAL_PARAMETER; + pau = dev->pau; + + /* The 'capabilities' argument defines what TL template the + * device can receive. OpenCAPI 5.0 defines 64 templates, so + * that's one bit per template. + * + * For each template, the device processing time may vary, so + * the device advertises at what rate a message of a given + * template can be sent. That's encoded in the 'rate' buffer. + * + * On P10, PAU only knows about TL templates 0 -> 3. + * Per the spec, template 0 must be supported. + */ + if (!pau_opencapi_is_templ_supported(0, capabilities)) + return OPAL_PARAMETER; + + reg = PAU_OTL_MISC_CFG_TX(dev->index); + val = pau_read(pau, reg); + val &= ~PAU_OTL_MISC_CFG_TX_TEMP1_EN; + val &= ~PAU_OTL_MISC_CFG_TX_TEMP2_EN; + val &= ~PAU_OTL_MISC_CFG_TX_TEMP3_EN; + + for (i = 0; i < 4; i++) { + /* Skip template 0 as it is implicitly enabled. + * Enable other template If supported by AFU + */ + if (i && pau_opencapi_is_templ_supported(i, capabilities)) + val |= PAU_OTL_MISC_CFG_TX_TEMP_EN(i); + /* The tx rate should still be set for template 0 */ + templ_rate = pau_opencapi_get_templ_rate(i, rate_buf); + rate_pos = 8 + i * 4; + val = SETFIELD(PAU_OTL_MISC_CFG_TX_TEMP_RATE(rate_pos, rate_pos + 3), + val, templ_rate); + } + pau_write(pau, reg, val); + PAUDEVDBG(dev, "OTL configuration register set to %llx\n", val); + + return OPAL_SUCCESS; +} + #define CQ_CTL_STATUS_TIMEOUT 10 /* milliseconds */ static int pau_opencapi_set_fence_control(struct pau_dev *dev, diff --git a/include/pau-regs.h b/include/pau-regs.h index 7a5aaa5..57c2d72 100644 --- a/include/pau-regs.h +++ b/include/pau-regs.h @@ -118,6 +118,7 @@ #define PAU_OTL_MISC_CFG0_EN PPC_BIT(0) #define PAU_OTL_MISC_CFG0_BLOCK_PE_HANDLE PPC_BIT(1) #define PAU_OTL_MISC_CFG0_BRICKID PPC_BITMASK(2, 3) +#define PAU_OTL_MISC_CFG0_PE_MASK PPC_BITMASK(4, 7) #define PAU_OTL_MISC_CFG0_ENABLE_4_0 PPC_BIT(51) #define PAU_OTL_MISC_CFG0_XLATE_RELEASE PPC_BIT(62) #define PAU_OTL_MISC_CFG0_ENABLE_5_0 PPC_BIT(63) @@ -132,11 +133,16 @@ #define PAU_OTL_MISC_CFG_TLX_CREDITS_DCP2 PPC_BITMASK(48, 55) #define PAU_OTL_MISC_CFG_TLX_CREDITS_DCP3 PPC_BITMASK(56, 63) #define PAU_OTL_MISC_CFG_TX(brk) (PAU_BLOCK_OTL(brk) + 0x058) +#define PAU_OTL_MISC_CFG_TX_TEMP1_EN PPC_BIT(1) +#define PAU_OTL_MISC_CFG_TX_TEMP2_EN PPC_BIT(2) +#define PAU_OTL_MISC_CFG_TX_TEMP3_EN PPC_BIT(3) +#define PAU_OTL_MISC_CFG_TX_TEMP_EN(n) PPC_BIT(n) #define PAU_OTL_MISC_CFG_TX_DRDY_WAIT PPC_BITMASK(5, 7) #define PAU_OTL_MISC_CFG_TX_TEMP0_RATE PPC_BITMASK(8, 11) #define PAU_OTL_MISC_CFG_TX_TEMP1_RATE PPC_BITMASK(12, 15) #define PAU_OTL_MISC_CFG_TX_TEMP2_RATE PPC_BITMASK(16, 19) #define PAU_OTL_MISC_CFG_TX_TEMP3_RATE PPC_BITMASK(20, 23) +#define PAU_OTL_MISC_CFG_TX_TEMP_RATE(nib0, nib1) PPC_BITMASK(nib0, nib1) #define PAU_OTL_MISC_CFG_TX_CRET_FREQ PPC_BITMASK(32, 34) #define PAU_OTL_MISC_OTL_REM0(brk) (PAU_BLOCK_OTL(brk) + 0x068) #define PAU_OTL_MISC_ERROR_SIG_RXI(brk) (PAU_BLOCK_OTL(brk) + 0x070) @@ -150,11 +156,18 @@ #define PAU_OTL_MISC_PSL_PEHANDLE_AN(brk) (PAU_BLOCK_OTL_PSL(brk) + 0x018) /* XSL block registers */ +#define PAU_XSL_OSL_SPAP_AN(brk) (PAU_BLOCK_XSL + 0x000 + (brk) * 8) +#define PAU_XSL_OSL_SPAP_AN_EN PPC_BIT(63) #define PAU_XSL_WRAP_CFG (PAU_BLOCK_XSL + 0x100) #define PAU_XSL_WRAP_CFG_CLOCK_ENABLE PPC_BIT(0) #define PAU_XSL_OSL_XLATE_CFG(brk) (PAU_BLOCK_XSL + 0x040 + (brk) * 8) #define PAU_XSL_OSL_XLATE_CFG_AFU_DIAL PPC_BIT(0) #define PAU_XSL_OSL_XLATE_CFG_OPENCAPI3 PPC_BIT(32) +#define PAU_XSL_OSL_CCINV (PAU_BLOCK_XSL + 0x070) +#define PAU_XSL_OSL_CCINV_REMOVE PPC_BIT(15) +#define PAU_XSL_OSL_CCINV_PENDING PPC_BIT(16) +#define PAU_XSL_OSL_CCINV_BRICK PPC_BIT(47) +#define PAU_XSL_OSL_CCINV_PE_HANDLE PPC_BITMASK(48, 62) /* XTS block registers */ #define PAU_XTS_CFG (PAU_BLOCK_PAU_XTS + 0x020) diff --git a/include/pau.h b/include/pau.h index c0a0940..894007d 100644 --- a/include/pau.h +++ b/include/pau.h @@ -200,6 +200,13 @@ static inline uint64_t pau_read(struct pau *pau, uint64_t reg) } void pau_opencapi_dump_scoms(struct pau *pau); +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn, + uint64_t addr, uint64_t PE_mask); +int64_t pau_opencapi_spa_clear_cache(struct phb *phb, + uint32_t __unused bdfn, + uint64_t PE_handle); +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn, + long capabilities, char *rate_buf); /* PHY */ int pau_dev_phy_reset(struct pau_dev *dev); |