diff options
author | Reza Arbab <arbab@linux.ibm.com> | 2019-07-17 15:44:25 -0500 |
---|---|---|
committer | Oliver O'Halloran <oohall@gmail.com> | 2019-07-26 15:30:21 +1000 |
commit | aa3fc69fef205fc4e8d2ad2d65a5d1e3fa8b1ec7 (patch) | |
tree | d210ab3cde38ba164879b5e4f679fd8d9efae906 | |
parent | d9b06b855e8b858976491c19ab9bd792e3a4c3e5 (diff) | |
download | skiboot-aa3fc69fef205fc4e8d2ad2d65a5d1e3fa8b1ec7.zip skiboot-aa3fc69fef205fc4e8d2ad2d65a5d1e3fa8b1ec7.tar.gz skiboot-aa3fc69fef205fc4e8d2ad2d65a5d1e3fa8b1ec7.tar.bz2 |
hw: Introduce npu3
POWER9P systems have been upgraded with NVLink 3.0 interconnects. The
underlying hardware is fundamentally different--each POWER9 chip has
(1 NPU) * (3 stacks) * (2 bricks) = (6 links)
Where in each POWER9P chip, there are
(3 NPUs) * (4 bricks) = (12 links)
This flatter hierarchy simplifies the firmware implementation a bit, but
also prevents sharing much common code with npu2.
As in previous versions, initialize the hardware and expose each link to
the OS as a virtual PCIe device. This initial support covers NVLink
devices only, with OpenCAPI to follow.
Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
Reviewed-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
-rw-r--r-- | core/init.c | 1 | ||||
-rw-r--r-- | hw/Makefile.inc | 2 | ||||
-rw-r--r-- | hw/npu-opal.c | 38 | ||||
-rw-r--r-- | hw/npu3-hw-procedures.c | 799 | ||||
-rw-r--r-- | hw/npu3-nvlink.c | 1830 | ||||
-rw-r--r-- | hw/npu3.c | 554 | ||||
-rw-r--r-- | include/npu3-regs.h | 248 | ||||
-rw-r--r-- | include/npu3.h | 180 | ||||
-rw-r--r-- | include/pci.h | 1 | ||||
-rw-r--r-- | include/platform.h | 4 | ||||
-rw-r--r-- | include/skiboot.h | 1 | ||||
-rw-r--r-- | include/xscom-p9-regs.h | 19 |
12 files changed, 3667 insertions, 10 deletions
diff --git a/core/init.c b/core/init.c index 7fa2fb6..25d827f 100644 --- a/core/init.c +++ b/core/init.c @@ -1226,6 +1226,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt) /* Probe NPUs */ probe_npu(); probe_npu2(); + probe_npu3(); /* Initialize PCI */ pci_init_slots(); diff --git a/hw/Makefile.inc b/hw/Makefile.inc index 2c8963c..b708bdf 100644 --- a/hw/Makefile.inc +++ b/hw/Makefile.inc @@ -8,7 +8,7 @@ HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o cache-p9.o -HW_OBJS += npu-opal.o +HW_OBJS += npu-opal.o npu3.o npu3-nvlink.o npu3-hw-procedures.o HW=hw/built-in.a include $(SRC)/hw/fsp/Makefile.inc diff --git a/hw/npu-opal.c b/hw/npu-opal.c index f106c73..208ec51 100644 --- a/hw/npu-opal.c +++ b/hw/npu-opal.c @@ -18,16 +18,23 @@ #include <pci.h> #include <phb4.h> #include <npu2.h> +#include <npu3.h> static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused, uint64_t msr, uint64_t bdf) { struct phb *phb = pci_get_phb(phb_id); - if (!phb || phb->phb_type != phb_type_npu_v2) + if (!phb) return OPAL_PARAMETER; - return npu2_init_context(phb, msr, bdf); + if (phb->phb_type == phb_type_npu_v2) + return npu2_init_context(phb, msr, bdf); + + if (phb->phb_type == phb_type_npu_v3) + return npu3_init_context(phb, msr, bdf); + + return OPAL_PARAMETER; } opal_call(OPAL_NPU_INIT_CONTEXT, opal_npu_init_context, 4); @@ -36,10 +43,16 @@ static int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused, { struct phb *phb = pci_get_phb(phb_id); - if (!phb || phb->phb_type != phb_type_npu_v2) + if (!phb) return OPAL_PARAMETER; - return npu2_destroy_context(phb, bdf); + if (phb->phb_type == phb_type_npu_v2) + return npu2_destroy_context(phb, bdf); + + if (phb->phb_type == phb_type_npu_v3) + return npu3_destroy_context(phb, bdf); + + return OPAL_PARAMETER; } opal_call(OPAL_NPU_DESTROY_CONTEXT, opal_npu_destroy_context, 3); @@ -48,10 +61,16 @@ static int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid, { struct phb *phb = pci_get_phb(phb_id); - if (!phb || phb->phb_type != phb_type_npu_v2) + if (!phb) return OPAL_PARAMETER; - return npu2_map_lpar(phb, bdf, lparid, lpcr); + if (phb->phb_type == phb_type_npu_v2) + return npu2_map_lpar(phb, bdf, lparid, lpcr); + + if (phb->phb_type == phb_type_npu_v3) + return npu3_map_lpar(phb, bdf, lparid, lpcr); + + return OPAL_PARAMETER; } opal_call(OPAL_NPU_MAP_LPAR, opal_npu_map_lpar, 4); @@ -81,10 +100,13 @@ static int64_t npu_set_relaxed_order(uint32_t gcid, int pec, bool enable) int64_t rc; for_each_phb(phb) { - if (phb->phb_type != phb_type_npu_v2) + if (phb->phb_type == phb_type_npu_v2) + rc = npu2_set_relaxed_order(phb, gcid, pec, enable); + else if (phb->phb_type == phb_type_npu_v3) + rc = npu3_set_relaxed_order(phb, gcid, pec, enable); + else continue; - rc = npu2_set_relaxed_order(phb, gcid, pec, enable); if (rc) return rc; } diff --git a/hw/npu3-hw-procedures.c b/hw/npu3-hw-procedures.c new file mode 100644 index 0000000..eb74496 --- /dev/null +++ b/hw/npu3-hw-procedures.c @@ -0,0 +1,799 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <npu3.h> +#include <npu3-regs.h> +#include <timebase.h> +#include <xscom.h> +#include <xscom-p9-regs.h> + +#define NPU3DEVLOG(l, dev, fmt, a...) \ + prlog(l, "NPU[%d:%d:%d]: " fmt, \ + (dev)->npu->chip_id, \ + (dev)->npu->index, \ + (dev)->index, ##a) +#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a) +#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a) +#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a) + +/* + * The documentation for the PHY training is written in terms of bits within an + * actual register so we use that representation here. + */ +struct npu3_phy_reg { + uint64_t offset; + uint64_t mask; +}; + +static struct npu3_phy_reg +NPU3_PHY_RX_RUN_LANE = { 0x0c8, PPC_BIT(48) }, +NPU3_PHY_RX_IORESET = { 0x096, PPC_BIT(63) }, +NPU3_PHY_TX_IORESET = { 0x113, PPC_BIT(48) }, +NPU3_PHY_RX_PR_RESET = { 0x096, PPC_BIT(62) }, +NPU3_PHY_RX_LANE_ANA_PDWN = { 0x002, PPC_BIT(54) }, +NPU3_PHY_RX_LANE_DIG_PDWN = { 0x088, PPC_BIT(48) }, +NPU3_PHY_RX_PR_PHASE_STEP = { 0x08a, PPC_BITMASK(60, 63) }, +NPU3_PHY_TX_LANE_PDWN = { 0x101, PPC_BIT(48) }, +NPU3_PHY_RX_RUN_DCCAL = { 0x0c8, PPC_BIT(49) }, +NPU3_PHY_RX_DCCAL_DONE = { 0x0ca, PPC_BIT(49) }, +NPU3_PHY_RX_LANE_BUSY = { 0x0ca, PPC_BIT(50) }, +NPU3_PHY_RX_B_BANK_CONTROLS = { 0x002, PPC_BITMASK(58, 63) }, +NPU3_PHY_TX_UNLOAD_CLK_DISABLE = { 0x103, PPC_BIT(56) }, +NPU3_PHY_TX_FIFO_INIT = { 0x105, PPC_BIT(53) }, +NPU3_PHY_TX_RXCAL = { 0x103, PPC_BIT(57) }, +NPU3_PHY_RX_INIT_DONE = { 0x0ca, PPC_BIT(48) }, +NPU3_PHY_RX_PR_EDGE_TRACK_CNTL = { 0x092, PPC_BITMASK(48, 49) }, +NPU3_PHY_RX_PR_FW_OFF = { 0x08a, PPC_BIT(56) }, +NPU3_PHY_RX_PR_FW_INERTIA_AMT = { 0x08a, PPC_BITMASK(57, 59) }, +NPU3_PHY_RX_CFG_LTE_MC = { 0x000, PPC_BITMASK(60, 63) }, +NPU3_PHY_RX_A_INTEG_COARSE_GAIN = { 0x00a, PPC_BITMASK(48, 51) }, +NPU3_PHY_RX_B_INTEG_COARSE_GAIN = { 0x026, PPC_BITMASK(48, 51) }, +NPU3_PHY_RX_E_INTEG_COARSE_GAIN = { 0x030, PPC_BITMASK(48, 51) }, + +/* These registers are per-PHY, not per lane */ +NPU3_PHY_TX_ZCAL_SWO_EN = { 0x3c9, PPC_BIT(48) }, +NPU3_PHY_TX_ZCAL_REQ = { 0x3c1, PPC_BIT(49) }, +NPU3_PHY_TX_ZCAL_DONE = { 0x3c1, PPC_BIT(50) }, +NPU3_PHY_TX_ZCAL_ERROR = { 0x3c1, PPC_BIT(51) }, +NPU3_PHY_TX_ZCAL_N = { 0x3c3, PPC_BITMASK(48, 56) }, +NPU3_PHY_TX_ZCAL_P = { 0x3c5, PPC_BITMASK(48, 56) }, +NPU3_PHY_TX_PSEG_PRE_EN = { 0x34d, PPC_BITMASK(51, 55) }, +NPU3_PHY_TX_PSEG_PRE_SELECT = { 0x34d, PPC_BITMASK(56, 60) }, +NPU3_PHY_TX_NSEG_PRE_EN = { 0x34f, PPC_BITMASK(51, 55) }, +NPU3_PHY_TX_NSEG_PRE_SELECT = { 0x34f, PPC_BITMASK(56, 60) }, +NPU3_PHY_TX_PSEG_POST_EN = { 0x361, PPC_BITMASK(49, 55) }, +NPU3_PHY_TX_PSEG_POST_SELECT = { 0x361, PPC_BITMASK(56, 62) }, +NPU3_PHY_TX_NSEG_POST_EN = { 0x363, PPC_BITMASK(49, 55) }, +NPU3_PHY_TX_NSEG_POST_SELECT = { 0x363, PPC_BITMASK(56, 62) }, +NPU3_PHY_TX_PSEG_MARGINPU_EN = { 0x351, PPC_BITMASK(48, 55) }, +NPU3_PHY_TX_NSEG_MARGINPU_EN = { 0x353, PPC_BITMASK(48, 55) }, +NPU3_PHY_TX_PSEG_MARGINPD_EN = { 0x351, PPC_BITMASK(56, 63) }, +NPU3_PHY_TX_NSEG_MARGINPD_EN = { 0x353, PPC_BITMASK(56, 63) }, +NPU3_PHY_TX_MARGINPU_SELECT = { 0x355, PPC_BITMASK(48, 55) }, +NPU3_PHY_TX_MARGINPD_SELECT = { 0x355, PPC_BITMASK(56, 63) }, +NPU3_PHY_TX_PSEG_MAIN_EN = { 0x357, PPC_BITMASK(51, 57) }, +NPU3_PHY_TX_NSEG_MAIN_EN = { 0x359, PPC_BITMASK(51, 57) }, +NPU3_PHY_RX_CLKDIST_PDWN = { 0x204, PPC_BITMASK(48, 50) }, +NPU3_PHY_RX_IREF_PDWN = { 0x230, PPC_BIT(54) }, +NPU3_PHY_TX_CLKDIST_PDWN = { 0x305, PPC_BITMASK(48, 50) }, +NPU3_PHY_RX_CTL_DATASM_CLKDIST_PDWN = { 0x2e0, PPC_BIT(60) }; + +static uint64_t npu3_phy_scom(struct npu3_dev *dev, struct npu3_phy_reg *reg, + int lane) +{ + uint64_t scom; + + /* Don't specify a lane for a non-per-lane register */ + if (lane >= 0) + assert(reg->offset < 0x200); + else + assert(reg->offset >= 0x200); + + scom = OB_INDIRECT(dev->ob_chiplet); + scom = SETFIELD(PPC_BITMASK(12, 21), scom, reg->offset); + + if (lane > 0) + scom = SETFIELD(PPC_BITMASK(27, 31), scom, lane); + + return scom; +} + +static void npu3_phy_write_lane(struct npu3_dev *dev, struct npu3_phy_reg *reg, + int lane, uint64_t val) +{ + struct npu3 *npu = dev->npu; + uint64_t scom, scom_val; + + scom = npu3_phy_scom(dev, reg, lane); + + xscom_read(npu->chip_id, scom, &scom_val); + scom_val = SETFIELD(reg->mask, scom_val, val); + xscom_write(npu->chip_id, scom, scom_val); +} + +static uint64_t npu3_phy_read_lane(struct npu3_dev *dev, + struct npu3_phy_reg *reg, + int lane) +{ + struct npu3 *npu = dev->npu; + uint64_t scom, scom_val; + + scom = npu3_phy_scom(dev, reg, lane); + xscom_read(npu->chip_id, scom, &scom_val); + + return GETFIELD(reg->mask, scom_val); +} + +static inline void npu3_phy_write(struct npu3_dev *dev, + struct npu3_phy_reg *reg, + uint64_t val) +{ + npu3_phy_write_lane(dev, reg, -1, val); +} + +static inline uint64_t npu3_phy_read(struct npu3_dev *dev, + struct npu3_phy_reg *reg) +{ + return npu3_phy_read_lane(dev, reg, -1); +} + +struct procedure { + const char *name; + uint32_t (*steps[])(struct npu3_dev *); +}; + +#define DEFINE_PROCEDURE(NAME, STEPS...) \ +static struct procedure procedure_##NAME = { \ + .name = #NAME, \ + .steps = { NAME, ##STEPS } \ +} + +static uint32_t stop(struct npu3_dev *npu_dev __unused) +{ + return NPU3_PROC_COMPLETE | NPU3_PROC_ABORTED; +} + +DEFINE_PROCEDURE(stop); + +static uint32_t nop(struct npu3_dev *npu_dev __unused) +{ + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(nop); + +static void set_iovalid(struct npu3_dev *dev, bool raise) +{ + struct npu3 *npu = dev->npu; + uint64_t reg, val; + + reg = OB_CPLT_CONF1(dev->ob_chiplet); + + xscom_read(npu->chip_id, reg, &val); + val = SETFIELD(OB_CPLT_CONF1_NV_IOVALID(dev->index), val, raise); + xscom_write(npu->chip_id, reg, val); +} + +#define NPU3_PHY_LANES 24 + +#define npu3_for_each_lane(lane, dev) \ + for (lane = 0; lane < NPU3_PHY_LANES; lane++) \ + if (dev->phy_lane_mask & PPC_BIT32(lane)) \ + +static uint32_t phy_reset(struct npu3_dev *dev) +{ + uint32_t lane; + + set_iovalid(dev, false); + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_LANE, lane, 0); + + return NPU3_PROC_NEXT; +} + +static uint32_t phy_reset_wait(struct npu3_dev *dev) +{ + int lane; + + /* Wait for all lanes to become inactive */ + npu3_for_each_lane(lane, dev) + if (npu3_phy_read_lane(dev, &NPU3_PHY_RX_LANE_BUSY, lane)) + return NPU3_PROC_INPROGRESS; + + npu3_for_each_lane(lane, dev) { + /* Set lane in reset */ + npu3_phy_write_lane(dev, &NPU3_PHY_RX_IORESET, lane, 1); + npu3_phy_write_lane(dev, &NPU3_PHY_TX_IORESET, lane, 1); + + /* Release lane from reset */ + npu3_phy_write_lane(dev, &NPU3_PHY_RX_IORESET, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_TX_IORESET, lane, 0); + + /* Reset the phase rotator */ + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_RESET, lane, 1); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_RESET, lane, 0); + } + + return NPU3_PROC_NEXT; +} + +/* Procedure 1.2.3 - Initialise I/O PHY Registers */ +static uint32_t phy_reset_complete(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) { + npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_ANA_PDWN, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_DIG_PDWN, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_PHASE_STEP, lane, 0xc); + npu3_phy_write_lane(dev, &NPU3_PHY_TX_LANE_PDWN, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_FW_INERTIA_AMT, lane, 4); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_CFG_LTE_MC, lane, 3); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_A_INTEG_COARSE_GAIN, lane, 11); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_B_INTEG_COARSE_GAIN, lane, 11); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_E_INTEG_COARSE_GAIN, lane, 11); + } + + set_iovalid(dev, true); + + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(phy_reset, phy_reset_wait, phy_reset_complete); + +/* Procedure 1.2.6 - I/O PHY Tx Impedance Calibration */ +static uint32_t phy_tx_zcal(struct npu3_dev *dev) +{ + if (dev->npu->tx_zcal_complete) + return NPU3_PROC_COMPLETE; + + /* Turn off SW enable and enable zcal state machine */ + npu3_phy_write(dev, &NPU3_PHY_TX_ZCAL_SWO_EN, 0); + + /* Start impedance calibration state machine */ + npu3_phy_write(dev, &NPU3_PHY_TX_ZCAL_REQ, 1); + + return NPU3_PROC_NEXT; +} + +static uint32_t phy_tx_zcal_wait(struct npu3_dev *dev) +{ + if (npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_ERROR)) + return NPU3_PROC_COMPLETE | NPU3_PROC_FAILED; + + if (!npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_DONE)) + return NPU3_PROC_INPROGRESS; + + return NPU3_PROC_NEXT; +} + +#define MARGIN_RATIO 0 +#define FFE_PRE_COEFF 0 +#define FFE_POST_COEFF 0 + +#define PRE_WIDTH 5 +#define POST_WIDTH 7 +#define MAIN_WIDTH 7 +#define ZCAL_MIN (16 * 2) +#define ZCAL_MAX (33 * 2) +#define PRECURSOR_X2_MAX (4 * 2 + 1) +#define POSTCURSOR_X2_MAX (6 * 2 + 1) +#define MARGIN_X2_MAX (8 * 2) +#define MAIN_X2_MAX (6 * 2 + 1) +#define TOTAL_X2_MAX (PRECURSOR_X2_MAX + POSTCURSOR_X2_MAX + \ + 2 * MARGIN_X2_MAX + MAIN_X2_MAX) + +static uint32_t therm(uint32_t dec) +{ + return (0x1 << dec) - 1; +} + +static uint32_t therm_with_half(uint32_t dec, uint8_t width) +{ + /* If the LSB of the 2r equivalent is on, then we need to set the 2r bit (MSB) */ + uint32_t half_on = (dec & 0x1) << (width - 1); + + /* Shift the 2r equivalent to a 1r value and convert to a thermometer code. */ + uint32_t x1_equiv = ((1 << (dec >> 1)) - 1); + + /* Combine 1r equivalent thermometer code + the 2r MSB value. */ + return half_on | x1_equiv; +} + +static uint32_t phy_tx_zcal_calculate(struct npu3_dev *dev) +{ + int p_value, n_value; + uint32_t zcal_n; + uint32_t zcal_p; + uint32_t p_main_enable = MAIN_X2_MAX; + uint32_t p_margin_pu_enable = MARGIN_X2_MAX; + uint32_t p_margin_pd_enable = MARGIN_X2_MAX; + uint32_t p_precursor_select; + uint32_t p_postcursor_select; + uint32_t margin_pu_select; + uint32_t n_main_enable = MAIN_X2_MAX; + uint32_t n_margin_pu_enable = MARGIN_X2_MAX; + uint32_t n_margin_pd_enable = MARGIN_X2_MAX; + uint32_t n_precursor_select; + uint32_t n_postcursor_select; + uint32_t margin_pd_select; + uint32_t margin_select; + + /* Convert the value from 8R to 2R by / 4 */ + zcal_n = npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_N) / 4; + zcal_p = npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_P) / 4; + + /* + * Again, if the hardware detects an unexpected condition it's + * better just to fail loudly. + */ + if (zcal_n < ZCAL_MIN || zcal_n > ZCAL_MAX || + zcal_p < ZCAL_MIN || zcal_p > ZCAL_MAX) + return NPU3_PROC_COMPLETE | NPU3_PROC_FAILED; + + p_value = zcal_p - TOTAL_X2_MAX; + p_precursor_select = p_value * FFE_PRE_COEFF / 128; + p_postcursor_select = p_value * FFE_POST_COEFF / 128; + margin_pu_select = p_value * MARGIN_RATIO / 256; + + if (p_value % 2) { + p_main_enable--; + p_value++; + } + + while (p_value < 0) { + if (p_main_enable > 1) { + p_main_enable -= 2; + } else if (p_margin_pu_enable + p_margin_pd_enable > 0) { + if (p_margin_pu_enable == p_margin_pd_enable) + p_margin_pd_enable -= 2; + else + p_margin_pu_enable -= 2; + } + p_value += 2; + } + + n_value = zcal_n - TOTAL_X2_MAX; + n_precursor_select = n_value * FFE_PRE_COEFF / 128; + n_postcursor_select = n_value * FFE_POST_COEFF / 128; + margin_pd_select = p_value * MARGIN_RATIO / 256; + + if (n_value % 2) { + n_main_enable--; + n_value++; + } + + while (n_value < 0) { + if (n_main_enable > 1) { + n_main_enable -= 2; + } else if (n_margin_pu_enable + n_margin_pd_enable > 0) { + if (n_margin_pu_enable == n_margin_pd_enable) + n_margin_pd_enable -= 2; + else + n_margin_pu_enable -= 2; + } + n_value += 2; + } + + margin_select = therm((margin_pu_select + 1) / 2) & + therm((margin_pd_select + 1) / 2) & + therm((p_margin_pu_enable + 1) / 2) & + therm((p_margin_pd_enable + 1) / 2) & + therm((n_margin_pu_enable + 1) / 2) & + therm((n_margin_pd_enable + 1) / 2); + + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_PRE_EN, therm_with_half(PRECURSOR_X2_MAX, PRE_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_PRE_SELECT, therm_with_half(p_precursor_select, PRE_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_POST_EN, therm_with_half(POSTCURSOR_X2_MAX, POST_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_POST_SELECT, therm_with_half(p_postcursor_select, POST_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_MARGINPU_EN, therm((p_margin_pu_enable + 1) / 2)); + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_MARGINPD_EN, therm((p_margin_pd_enable + 1) / 2)); + npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_MAIN_EN, therm_with_half(p_main_enable, MAIN_WIDTH)); + + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_PRE_EN, therm_with_half(PRECURSOR_X2_MAX, PRE_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_PRE_SELECT, therm_with_half(n_precursor_select, PRE_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_POST_EN, therm_with_half(POSTCURSOR_X2_MAX, POST_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_POST_SELECT, therm_with_half(n_postcursor_select, POST_WIDTH)); + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_MARGINPU_EN, therm((n_margin_pu_enable + 1) / 2)); + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_MARGINPD_EN, therm((n_margin_pd_enable + 1) / 2)); + npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_MAIN_EN, therm_with_half(n_main_enable, MAIN_WIDTH)); + + npu3_phy_write(dev, &NPU3_PHY_TX_MARGINPU_SELECT, therm(margin_select + 1) / 2); + npu3_phy_write(dev, &NPU3_PHY_TX_MARGINPD_SELECT, therm(margin_select + 1) / 2); + + dev->npu->tx_zcal_complete = true; + + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(phy_tx_zcal, phy_tx_zcal_wait, phy_tx_zcal_calculate); + +/* Procedure 1.2.4 - I/O PHY DC Calibration */ +static uint32_t phy_rx_dccal(struct npu3_dev *dev) +{ + int lane; + + set_iovalid(dev, false); + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_FW_OFF, lane, 1); + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_DCCAL, lane, 1); + + return NPU3_PROC_NEXT; +} + +static uint32_t phy_rx_dccal_complete(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) + if (!npu3_phy_read_lane(dev, &NPU3_PHY_RX_DCCAL_DONE, lane)) + return NPU3_PROC_INPROGRESS; + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_DCCAL, lane, 0); + + npu3_for_each_lane(lane, dev) { + npu3_phy_write_lane(dev, &NPU3_PHY_RX_B_BANK_CONTROLS, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_EDGE_TRACK_CNTL, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_FW_OFF, lane, 0); + } + + return NPU3_PROC_NEXT; +} + +/* Procedure 1.2.5 - IO PHY Tx FIFO Init */ +static uint32_t phy_tx_fifo_init(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) { + npu3_phy_write_lane(dev, &NPU3_PHY_TX_UNLOAD_CLK_DISABLE, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_TX_FIFO_INIT, lane, 1); + npu3_phy_write_lane(dev, &NPU3_PHY_TX_UNLOAD_CLK_DISABLE, lane, 1); + } + + set_iovalid(dev, true); + + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(phy_rx_dccal, phy_rx_dccal_complete, phy_tx_fifo_init); + +/* Procedure 1.2.8 - Enable Downstream Link Training */ +static uint32_t phy_enable_tx_rxcal(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_TX_RXCAL, lane, 1); + + return NPU3_PROC_COMPLETE; +} +DEFINE_PROCEDURE(phy_enable_tx_rxcal); + +/* Procedure 1.2.9 - Disable Downstream Link Training */ +static uint32_t phy_disable_tx_rxcal(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_TX_RXCAL, lane, 0); + + return NPU3_PROC_COMPLETE; +} +DEFINE_PROCEDURE(phy_disable_tx_rxcal); + +/* Procedure 1.2.7 - I/O PHY Upstream Link Training */ +static uint32_t phy_rx_training(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) + npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_LANE, lane, 1); + + return NPU3_PROC_NEXT; +} + +static uint32_t phy_rx_training_wait(struct npu3_dev *dev) +{ + int lane; + + npu3_for_each_lane(lane, dev) + if (!npu3_phy_read_lane(dev, &NPU3_PHY_RX_INIT_DONE, lane)) + return NPU3_PROC_INPROGRESS; + + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(phy_rx_training, phy_rx_training_wait); + +static void npu3_dev_fence_set(struct npu3_dev *dev, uint8_t state) +{ + struct npu3 *npu = dev->npu; + uint64_t val; + + val = npu3_read(npu, NPU3_NTL_MISC_CFG1(dev->index)); + val = SETFIELD(NPU3_NTL_MISC_CFG1_NTL_RESET, val, state); + npu3_write(npu, NPU3_NTL_MISC_CFG1(dev->index), val); +} + +static uint8_t npu3_dev_fence_get(struct npu3_dev *dev) +{ + uint64_t val; + + val = npu3_read(dev->npu, NPU3_NTL_CQ_FENCE_STATUS(dev->index)); + return GETFIELD(NPU3_NTL_CQ_FENCE_STATUS_FIELD, val); +} + +/* Procedure 1.2.1 - Reset NPU/NDL */ +static uint32_t reset_ntl(struct npu3_dev *dev) +{ + struct npu3 *npu = dev->npu; + uint64_t val; + int lane; + + set_iovalid(dev, true); + + /* Power on clocks */ + npu3_phy_write(dev, &NPU3_PHY_RX_CLKDIST_PDWN, 0); + npu3_phy_write(dev, &NPU3_PHY_RX_IREF_PDWN, 1); + npu3_phy_write(dev, &NPU3_PHY_TX_CLKDIST_PDWN, 0); + npu3_phy_write(dev, &NPU3_PHY_RX_CTL_DATASM_CLKDIST_PDWN, 0); + + npu3_for_each_lane(lane, dev) { + npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_ANA_PDWN, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_DIG_PDWN, lane, 0); + npu3_phy_write_lane(dev, &NPU3_PHY_TX_LANE_PDWN, lane, 0); + } + + /* Write PRI */ + val = SETFIELD(NPU3_NTL_PRI_CFG_NDL, 0ull, dev->index); + npu3_write(npu, NPU3_NTL_PRI_CFG(dev->index), val); + + /* Disable RX parity checking */ + val = npu3_read(npu, NPU3_NTL_MISC_CFG2(dev->index)); + val &= ~NPU3_NTL_MISC_CFG2_NDL_RX_PARITY_ENA; + npu3_write(npu, NPU3_NTL_MISC_CFG2(dev->index), val); + + if (dev->type == NPU3_DEV_TYPE_NVLINK) + npu3_pvd_flag_clear(dev, NPU3_DEV_DL_RESET); + + npu3_dev_fence_set(dev, NPU3_NTL_CQ_FENCE_STATUS_FULL); + + return NPU3_PROC_NEXT; +} + +static uint32_t reset_ndl(struct npu3_dev *dev) +{ + struct npu3 *npu = dev->npu; + uint64_t reg; + uint32_t val32; + + if (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_FULL) + return NPU3_PROC_INPROGRESS; + + reg = NPU3_DLPL_CTL(dev->index); + val32 = npu3_read_4b(npu, reg); + val32 |= NPU3_DLPL_CTL_RESET_RX | NPU3_DLPL_CTL_RESET_MISC; + npu3_write_4b(npu, reg, val32); + + val32 = npu3_read_4b(npu, reg); + val32 &= ~(NPU3_DLPL_CTL_RESET_RX | NPU3_DLPL_CTL_RESET_MISC); + npu3_write_4b(npu, reg, val32); + + reg = NPU3_DLPL_CFG(dev->index); + val32 = NPU3_DLPL_CFG_PRI_BYTESWAP; + npu3_write_4b(npu, reg, val32); + + /* Clear FIR bits */ + for (uint32_t i = 0; i < NPU3_FIR_MAX; i++) + xscom_write(npu->chip_id, npu->xscom_base + NPU3_FIR(i), 0ull); + + npu3_dev_fence_set(dev, NPU3_NTL_CQ_FENCE_STATUS_HALF); + + return NPU3_PROC_NEXT; +} + +static uint32_t reset_ntl_release(struct npu3_dev *dev) +{ + struct npu3 *npu = dev->npu; + uint32_t i = dev->index; + + if (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_HALF) + return NPU3_PROC_INPROGRESS; + + /* Credit setup */ + npu3_write(npu, NPU3_NTL_CREQ_HDR_CRED_SND(i), 0x0200000000000000); + npu3_write(npu, NPU3_NTL_PRB_HDR_CRED_SND(i), 0x0200000000000000); + npu3_write(npu, NPU3_NTL_ATR_HDR_CRED_SND(i), 0x0200000000000000); + npu3_write(npu, NPU3_NTL_RSP_HDR_CRED_SND(i), 0x0200000000000000); + npu3_write(npu, NPU3_NTL_CREQ_DAT_CRED_SND(i), 0x1000000000000000); + npu3_write(npu, NPU3_NTL_RSP_DAT_CRED_SND(i), 0x1000000000000000); + + npu3_write(npu, NPU3_NTL_CREQ_HDR_CRED_RCV(i), 0x0000be0000000000); + npu3_write(npu, NPU3_NTL_DGD_HDR_CRED_RCV(i), 0x0000640000000000); + npu3_write(npu, NPU3_NTL_ATSD_HDR_CRED_RCV(i), 0x0000200000000000); + npu3_write(npu, NPU3_NTL_RSP_HDR_CRED_RCV(i), 0x0000be0000000000); + npu3_write(npu, NPU3_NTL_CREQ_DAT_CRED_RCV(i), 0x0001000000000000); + npu3_write(npu, NPU3_NTL_RSP_DAT_CRED_RCV(i), 0x0001000000000000); + + npu3_dev_fence_set(dev, NPU3_NTL_CQ_FENCE_STATUS_NONE); + + return NPU3_PROC_NEXT; +} + +static uint32_t reset_ntl_finish(struct npu3_dev *dev) { + struct npu3 *npu = dev->npu; + uint64_t val; + + if (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_NONE) + return NPU3_PROC_INPROGRESS; + + /* Enable RX parity checking */ + val = npu3_read(npu, NPU3_NTL_MISC_CFG2(dev->index)); + val |= NPU3_NTL_MISC_CFG2_NDL_RX_PARITY_ENA; + npu3_write(npu, NPU3_NTL_MISC_CFG2(dev->index), val); + + if (dev->type == NPU3_DEV_TYPE_NVLINK) + npu3_pvd_flag_set(dev, NPU3_DEV_DL_RESET); + + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(reset_ntl, reset_ndl, reset_ntl_release, reset_ntl_finish); + +static int npu3_dev_regcmp(struct npu3_dev *dev, uint64_t reg, + const char *reg_name, uint64_t expected) +{ + uint64_t val; + + val = npu3_read(dev->npu, reg); + if (val == expected) + return 0; + + NPU3DEVERR(dev, "%s: expected 0x%llx, read 0x%llx\n", + reg_name, expected, val); + + return 1; +} + +#define REGCMP(reg, expected) \ + npu3_dev_regcmp(dev, reg(dev->index), #reg, expected) + +static uint32_t check_credits(struct npu3_dev *dev) +{ + /* Use bitwise OR to prevent short-circuit evaluation */ + if (REGCMP(NPU3_NTL_CREQ_HDR_CRED_RCV, 0x0be0be0000000000ull) | + REGCMP(NPU3_NTL_DGD_HDR_CRED_RCV, 0x0640640000000000ull) | + REGCMP(NPU3_NTL_ATSD_HDR_CRED_RCV, 0x0200200000000000ull) | + REGCMP(NPU3_NTL_RSP_HDR_CRED_RCV, 0x0be0be0000000000ull) | + REGCMP(NPU3_NTL_CREQ_DAT_CRED_RCV, 0x1001000000000000ull) | + REGCMP(NPU3_NTL_RSP_DAT_CRED_RCV, 0x1001000000000000ull)) + return NPU3_PROC_COMPLETE | NPU3_PROC_FAILED; + + return NPU3_PROC_COMPLETE; +} + +DEFINE_PROCEDURE(check_credits); + +static struct procedure *procedures[] = { + [0] = &procedure_stop, + [1] = &procedure_nop, + [4] = &procedure_phy_reset, + [5] = &procedure_phy_tx_zcal, + [6] = &procedure_phy_rx_dccal, + [7] = &procedure_phy_enable_tx_rxcal, + [8] = &procedure_phy_disable_tx_rxcal, + [9] = &procedure_phy_rx_training, + [10] = &procedure_reset_ntl, + [11] = &procedure_nop, /* Placeholder for pre-terminate */ + [12] = &procedure_nop, /* Placeholder for terminate */ + [13] = &procedure_check_credits, +}; + +void npu3_dev_procedure_init(struct npu3_dev *dev, uint32_t pnum) +{ + struct npu3_procedure *proc = &dev->proc; + const char *name; + + if (pnum >= ARRAY_SIZE(procedures) || !procedures[pnum]) { + NPU3DEVERR(dev, "Unsupported procedure number %d\n", pnum); + proc->status = NPU3_PROC_COMPLETE | NPU3_PROC_UNSUPPORTED; + return; + } + + name = procedures[pnum]->name; + + if (proc->number == pnum && !(proc->status & NPU3_PROC_COMPLETE)) + NPU3DEVINF(dev, "Restarting procedure %s\n", name); + else + NPU3DEVINF(dev, "Starting procedure %s\n", name); + + proc->status = NPU3_PROC_INPROGRESS; + proc->number = pnum; + proc->step = 0; + proc->timeout = mftb() + msecs_to_tb(1000); +} + +static uint32_t npu3_dev_procedure_run_step(struct npu3_dev *dev) +{ + struct npu3_procedure *proc = &dev->proc; + uint32_t result; + + result = procedures[proc->number]->steps[proc->step](dev); + if (result & NPU3_PROC_NEXT) { + proc->step++; + + NPU3DEVINF(dev, "Running procedure %s step %d\n", + procedures[proc->number]->name, proc->step); + } + + return result; +} + +static void npu3_dev_procedure_run(struct npu3_dev *dev) +{ + struct npu3_procedure *proc = &dev->proc; + const char *name; + uint32_t result; + + do { + result = npu3_dev_procedure_run_step(dev); + } while (result & NPU3_PROC_NEXT); + + name = procedures[proc->number]->name; + + if (result & NPU3_PROC_COMPLETE) { + NPU3DEVINF(dev, "Procedure %s complete\n", name); + } else if (tb_compare(mftb(), proc->timeout) == TB_AAFTERB) { + NPU3DEVINF(dev, "Procedure %s timed out\n", name); + result = NPU3_PROC_COMPLETE | NPU3_PROC_FAILED; + } + + /* Mask off internal state bits */ + proc->status = result & NPU3_PROC_STATUS_MASK; +} + +uint32_t npu3_dev_procedure_status(struct npu3_dev *dev) +{ + /* Run the procedure if not already complete */ + if (!(dev->proc.status & NPU3_PROC_COMPLETE)) + npu3_dev_procedure_run(dev); + + return dev->proc.status; +} + +int64_t npu3_dev_reset(struct npu3_dev *dev) +{ + unsigned long timeout; + + reset_ntl(dev); + timeout = mftb() + msecs_to_tb(1000); + + while (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_FULL) { + if (tb_compare(mftb(), timeout) == TB_AAFTERB) { + NPU3DEVINF(dev, "Device reset timed out\n"); + return OPAL_BUSY; + } + } + + return OPAL_SUCCESS; +} diff --git a/hw/npu3-nvlink.c b/hw/npu3-nvlink.c new file mode 100644 index 0000000..edc4aaa --- /dev/null +++ b/hw/npu3-nvlink.c @@ -0,0 +1,1830 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <device.h> +#include <phys-map.h> +#include <npu3.h> +#include <npu3-regs.h> +#include <pci-virt.h> +#include <xscom.h> +#include <xscom-p9-regs.h> +#include <interrupts.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <cache-p9.h> + +#define NPU3LOG(l, npu, fmt, a...) \ + prlog(l, "NPU#%04x[%d:%d]: " fmt, \ + (npu)->nvlink.phb.opal_id, \ + (npu)->chip_id, \ + (npu)->index, ##a) +#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a) +#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a) +#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a) + +#define NPU3DEVLOG(l, dev, fmt, a...) \ + prlog(l, "NPU#%04x:%02x:%02x.%x " fmt, \ + (dev)->npu->nvlink.phb.opal_id, \ + (dev)->nvlink.pvd->bdfn >> 8 & 0xff, \ + (dev)->nvlink.pvd->bdfn >> 3 & 0x1f, \ + (dev)->nvlink.pvd->bdfn & 0x7, ##a) +#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a) +#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a) +#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a) + +#define NPU3_CFG_READ(size, type) \ +static int64_t npu3_cfg_read##size(struct phb *phb, uint32_t bdfn, \ + uint32_t offset, type *data) \ +{ \ + uint32_t val; \ + int64_t ret; \ + \ + ret = pci_virt_cfg_read(phb, bdfn, offset, \ + sizeof(*data), &val); \ + *data = (type)val; \ + return ret; \ +} + +#define NPU3_CFG_WRITE(size, type) \ +static int64_t npu3_cfg_write##size(struct phb *phb, uint32_t bdfn, \ + uint32_t offset, type data) \ +{ \ + uint32_t val = data; \ + int64_t ret; \ + \ + ret = pci_virt_cfg_write(phb, bdfn, offset, \ + sizeof(data), val); \ + return ret; \ +} + +NPU3_CFG_READ(8, u8); +NPU3_CFG_READ(16, u16); +NPU3_CFG_READ(32, u32); +NPU3_CFG_WRITE(8, u8); +NPU3_CFG_WRITE(16, u16); +NPU3_CFG_WRITE(32, u32); + +static int64_t npu3_eeh_freeze_status(struct phb *phb __unused, + uint64_t pe_num __unused, + uint8_t *freeze_state, + uint16_t *pci_error_type, + uint16_t *severity) +{ + /* + * FIXME: When it's called by skiboot PCI config accessor, + * the PE number is fixed to 0, which is incorrect. We need + * introduce another PHB callback to translate it. For now, + * it keeps the skiboot PCI enumeration going. + */ + *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN; + *pci_error_type = OPAL_EEH_NO_ERROR; + + if (severity) + *severity = OPAL_EEH_SEV_NO_ERROR; + + return OPAL_SUCCESS; +} + +/* Number of PEs supported */ +#define NPU3_MAX_PE_NUM 16 +#define NPU3_RESERVED_PE_NUM 15 + +static int64_t npu3_ioda_reset(struct phb *phb, bool purge __unused) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + uint64_t val; + + val = NPU3_ATS_IODA_ADDR_AUTO_INC; + val = SETFIELD(NPU3_ATS_IODA_ADDR_TBL_SEL, val, + NPU3_ATS_IODA_ADDR_TBL_TVT); + npu3_write(npu, NPU3_ATS_IODA_ADDR, val); + + for (uint32_t i = 0; i < NPU3_MAX_PE_NUM; i++) + npu3_write(npu, NPU3_ATS_IODA_DATA, 0ull); + + return OPAL_SUCCESS; +} + +static inline void npu3_ioda_sel(struct npu3 *npu, uint32_t table, + uint32_t index) +{ + uint64_t val; + + val = SETFIELD(NPU3_ATS_IODA_ADDR_TBL_SEL, 0ull, table); + val = SETFIELD(NPU3_ATS_IODA_ADDR_TBL_ADDR, val, index); + npu3_write(npu, NPU3_ATS_IODA_ADDR, val); +} + +static int64_t npu3_map_pe_dma_window(struct phb *phb, + uint64_t pe_num, + uint16_t window_id, + uint16_t tce_levels, + uint64_t tce_table_addr, + uint64_t tce_table_size, + uint64_t tce_page_size) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + uint64_t tts_encoded, val; + uint32_t page_size; + + /* Each PE has one corresponding TVE */ + if (window_id != pe_num || pe_num >= NPU3_MAX_PE_NUM) + return OPAL_PARAMETER; + + npu3_ioda_sel(npu, NPU3_ATS_IODA_ADDR_TBL_TVT, pe_num); + + /* TCE table size zero is used to disable the TVE */ + if (!tce_table_size) { + npu3_write(npu, NPU3_ATS_IODA_DATA, 0ull); + return OPAL_SUCCESS; + } + + /* TCE table size */ + if (!is_pow2(tce_table_size) || tce_table_size < 0x1000) + return OPAL_PARAMETER; + + tts_encoded = ilog2(tce_table_size) - 11; + if (tts_encoded > 39) + return OPAL_PARAMETER; + + val = SETFIELD(NPU3_ATS_IODA_TVT_TABLE_SIZE, 0ull, tts_encoded); + + /* Number of levels */ + if (tce_levels < 1 || tce_levels > 4) + return OPAL_PARAMETER; + + val = SETFIELD(NPU3_ATS_IODA_TVT_TABLE_LEVEL, val, tce_levels - 1); + + /* TCE page size */ + switch (tce_page_size) { + case 256 << 20: + page_size = 17; + break; + case 16 << 20: + page_size = 13; + break; + case 64 << 10: + page_size = 5; + break; + default: + page_size = 1; + } + + val = SETFIELD(NPU3_ATS_IODA_TVT_PAGE_SIZE, val, page_size); + val = SETFIELD(NPU3_ATS_IODA_TVT_XLAT_ADDR, val, tce_table_addr >> 12); + npu3_write(npu, NPU3_ATS_IODA_DATA, val); + + return OPAL_SUCCESS; +} + +static int64_t npu3_map_pe_dma_window_real(struct phb *phb, + uint64_t pe_num, + uint16_t window_id, + uint64_t pci_start_addr __unused, + uint64_t pci_mem_size __unused) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + uint64_t val; + + /* Each PE has one corresponding TVE */ + if (window_id != pe_num || pe_num >= NPU3_MAX_PE_NUM) + return OPAL_PARAMETER; + + if (pci_mem_size) { + /* + * GPUs need to be able to access the MMIO memory space as well. + * On POWER9 this is above the top of RAM, so disable the TVT + * range check, allowing access to all memory addresses. + */ + val = 0; + } else { + /* Disable */ + val = PPC_BIT(51); + } + + npu3_ioda_sel(npu, NPU3_ATS_IODA_ADDR_TBL_TVT, pe_num); + npu3_write(npu, NPU3_ATS_IODA_DATA, val); + + return OPAL_SUCCESS; +} + +static int64_t npu3_next_error(struct phb *phb, + uint64_t *first_frozen_pe, + uint16_t *pci_error_type, + uint16_t *severity) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + uint64_t val; + uint32_t pe_num; + + if (!first_frozen_pe || !pci_error_type || !severity) + return OPAL_PARAMETER; + + *first_frozen_pe = -1; + *pci_error_type = OPAL_EEH_NO_ERROR; + *severity = OPAL_EEH_SEV_NO_ERROR; + + for (pe_num = 0; pe_num < NPU3_MAX_PE_NUM; pe_num++) { + val = npu3_read(npu, NPU3_MISC_PESTB_DATA(pe_num)); + if (!GETFIELD(NPU3_MISC_PESTB_DATA_DMA_STOPPED_STATE, val)) + continue; + + *first_frozen_pe = pe_num; + *pci_error_type = OPAL_EEH_PE_ERROR; + *severity = OPAL_EEH_SEV_PE_ER; + break; + } + + return OPAL_SUCCESS; +} + +static struct npu3_dev *npu3_bdfn_to_dev(struct npu3 *npu, uint32_t bdfn) +{ + struct pci_virt_device *pvd; + + /* All emulated devices are attached to root bus */ + if (bdfn & ~0xff) + return NULL; + + pvd = pci_virt_find_device(&npu->nvlink.phb, bdfn); + if (pvd) + return pvd->data; + + return NULL; +} + +static int npu3_match_gpu(struct phb *phb __unused, struct pci_device *pd, + void *data) +{ + const char *slot = data; + struct dt_node *dn; + char *loc_code; + + /* Ignore non-NVIDIA devices */ + if (PCI_VENDOR_ID(pd->vdid) != 0x10de) + return 0; + + /* Find the PCI device's slot location */ + for (dn = pd->dn; + dn && !dt_find_property(dn, "ibm,loc-code"); + dn = dn->parent); + + if (!dn) + return 0; + + loc_code = (char *)dt_prop_get(dn, "ibm,loc-code"); + if (streq(loc_code, slot)) + return 1; + + return 0; +} + +static void npu3_dev_find_gpu(struct npu3_dev *dev) +{ + const char *slot = dev->nvlink.loc_code; + struct phb *phb; + struct pci_device *gpu; + + if (!slot) + return; + + for_each_phb(phb) { + gpu = pci_walk_dev(phb, NULL, npu3_match_gpu, (void *)slot); + if (!gpu) + continue; + + dev->nvlink.gpu = gpu; + return; + } + + NPU3DEVINF(dev, "No PCI device found for slot '%s'\n", slot); +} + +#define VENDOR_CAP_START 0x80 +#define VENDOR_CAP_LINK_FLAG_OFFSET 0x0d + +void npu3_pvd_flag_set(struct npu3_dev *dev, uint8_t flag) +{ + uint32_t offset = VENDOR_CAP_START + VENDOR_CAP_LINK_FLAG_OFFSET; + uint32_t flags; + + PCI_VIRT_CFG_RDONLY_RD(dev->nvlink.pvd, offset, 1, &flags); + flags |= flag; + PCI_VIRT_CFG_INIT_RO(dev->nvlink.pvd, offset, 1, flags); +} + +void npu3_pvd_flag_clear(struct npu3_dev *dev, uint8_t flag) +{ + uint32_t offset = VENDOR_CAP_START + VENDOR_CAP_LINK_FLAG_OFFSET; + uint32_t flags; + + PCI_VIRT_CFG_RDONLY_RD(dev->nvlink.pvd, offset, 1, &flags); + flags &= ~flag; + PCI_VIRT_CFG_INIT_RO(dev->nvlink.pvd, offset, 1, flags); +} + +static struct lock npu3_phandle_lock = LOCK_UNLOCKED; + +static void npu3_append_phandle(struct dt_node *dn, const char *name, + uint32_t phandle) +{ + struct dt_property *prop; + uint32_t *phandles; + size_t len; + + prop = __dt_find_property(dn, name); + if (!prop) { + dt_add_property_cells(dn, name, phandle); + return; + } + + /* + * Make sure no one else has a reference to the property. Assume + * this is the only function that holds a reference to it. + */ + lock(&npu3_phandle_lock); + + /* Need to append to the property */ + len = prop->len + sizeof(*phandles); + dt_resize_property(&prop, len); + prop->len = len; + + phandles = (uint32_t *)prop->prop; + phandles[len / sizeof(*phandles) - 1] = phandle; + + unlock(&npu3_phandle_lock); +} + +static void npu3_dev_fixup_dt(struct npu3_dev *dev) +{ + struct pci_device *pd = dev->nvlink.pd; + struct pci_device *gpu = dev->nvlink.gpu; + + dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dn->phandle); + dt_add_property_string(pd->dn, "ibm,loc-code", dev->nvlink.loc_code); + if (dev->link_speed != 0xff) + dt_add_property_cells(pd->dn, "ibm,nvlink-speed", + lo32(dev->link_speed)); + + if (!gpu) + return; + + npu3_append_phandle(gpu->dn, "ibm,npu", pd->dn->phandle); + dt_add_property_cells(pd->dn, "ibm,gpu", gpu->dn->phandle); +} + +static int64_t npu3_gpu_bridge_sec_bus_reset(void *pdev, + struct pci_cfg_reg_filter *pcrf __unused, + uint32_t offset, uint32_t len, + uint32_t *data, bool write) +{ + struct pci_device *pd = pdev; + struct pci_device *gpu; + struct npu3 *npu; + struct npu3_dev *dev; + bool purge = false; + + if (!write) + return OPAL_PARAMETER; + + if (len != 2 || offset & 1) { + PCIERR(pd->phb, pd->bdfn, + "Unsupported write to bridge control register\n"); + return OPAL_PARAMETER; + } + + if (!(*data & PCI_CFG_BRCTL_SECONDARY_RESET)) + return OPAL_PARTIAL; + + gpu = list_top(&pd->children, struct pci_device, link); + if (!gpu) + return OPAL_PARTIAL; + + npu3_for_each_nvlink_npu(npu) + npu3_for_each_nvlink_dev(dev, npu) + if (dev->nvlink.gpu == gpu) + if (!npu3_dev_reset(dev)) + purge = true; + + if (purge) + purge_l2_l3_caches(); + + return OPAL_PARTIAL; +} + +static int npu3_dev_bind(struct phb *phb, struct pci_device *pd, + void *data __unused) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + struct npu3_dev *dev = npu3_bdfn_to_dev(npu, pd->bdfn); + struct pci_device *gpu; + + dev->nvlink.pd = pd; + + /* The slot label indicates which GPU this link is connected to */ + dev->nvlink.loc_code = dt_prop_get_def(dev->dn, "ibm,slot-label", NULL); + if (!dev->nvlink.loc_code) { + /** + * @fwts-label NPUNoPHBSlotLabel + * @fwts-advice No GPU/NPU slot information was found. + * NVLink3 functionality will not work. + */ + NPU3DEVERR(dev, "Cannot find GPU slot information\n"); + } + + npu3_dev_find_gpu(dev); + npu3_dev_fixup_dt(dev); + + gpu = dev->nvlink.gpu; + if (!gpu) + return 0; + + /* When a GPU is reset, ensure all of its links are reset too */ + if (gpu->parent && gpu->parent->slot) + pci_add_cfg_reg_filter(gpu->parent, PCI_CFG_BRCTL, 2, + PCI_REG_FLAG_WRITE, + npu3_gpu_bridge_sec_bus_reset); + + npu3_pvd_flag_set(dev, NPU3_DEV_PCI_LINKED); + + return 0; +} + +struct npu3 *npu3_next_nvlink_npu(struct npu3 *npu, uint32_t chip_id) +{ + uint64_t phb_id = 0; + struct phb *phb; + + if (npu) + phb_id = npu->nvlink.phb.opal_id + 1; + + for (; (phb = __pci_next_phb_idx(&phb_id));) { + if (phb->phb_type != phb_type_npu_v3) + continue; + + npu = npu3_phb_to_npu(phb); + if (npu->chip_id == chip_id || chip_id == NPU3_ANY_CHIP) + return npu; + } + + return NULL; +} + +static struct npu3 *npu3_last_npu(void) +{ + static struct npu3 *last = NULL; + struct npu3 *npu; + + if (last) + return last; + + npu3_for_each_nvlink_npu(npu) + last = npu; + + return last; +} + +static uint32_t npu3_gpu_links(struct pci_device *gpu) +{ + const struct dt_property *prop; + + if (!gpu) + return 0; + + /* The link count is the number of phandles in "ibm,npu" */ + prop = dt_find_property(gpu->dn, "ibm,npu"); + if (!prop) + return 0; + + return prop->len / sizeof(uint32_t); +} + +static uint32_t npu3_links_per_gpu(void) +{ + struct npu3 *npu; + struct npu3_dev *dev; + uint32_t links = 0; + + /* Use the first GPU we find to figure this out */ + npu3_for_each_nvlink_npu(npu) { + npu3_for_each_nvlink_dev(dev, npu) { + links = npu3_gpu_links(dev->nvlink.gpu); + if (links) + goto out; + } + } + +out: + prlog(PR_DEBUG, "NPU: %s: %d\n", __func__, links); + + return links; +} + +int32_t npu3_dev_gpu_index(struct npu3_dev *dev) +{ + const char *slot; + char *p = NULL; + int ret; + + slot = dev->nvlink.loc_code; + if (!slot) + return -1; + + if (memcmp(slot, "GPU", 3)) + return -1; + + ret = strtol(slot + 3, &p, 10); + if (*p || p == slot + 3) + return -1; + + return ret; +} + +static uint32_t npu3_chip_possible_gpu_links(void) +{ + struct proc_chip *chip; + struct npu3 *npu; + struct npu3_dev *dev; + uint32_t possible = 0; + + for_each_chip(chip) { + npu3_for_each_chip_nvlink_npu(npu, chip->id) + npu3_for_each_nvlink_dev(dev, npu) + if (npu3_dev_gpu_index(dev) != -1) + possible++; + + if (possible) + break; + } + + prlog(PR_DEBUG, "NPU: %s: %d\n", __func__, possible); + + return possible; +} + +uint32_t npu3_chip_possible_gpus(void) +{ + static uint32_t possible = -1; + uint32_t links_per_gpu; + + /* Static value, same for all chips; only do this once */ + if (possible != -1) + return possible; + + possible = 0; + + links_per_gpu = npu3_links_per_gpu(); + if (links_per_gpu) + possible = npu3_chip_possible_gpu_links() / links_per_gpu; + + prlog(PR_DEBUG, "NPU: %s: %d\n", __func__, possible); + + return possible; +} + +static void npu3_dev_assign_gmb(struct npu3_dev *dev, uint64_t addr, + uint64_t size) +{ + uint32_t mode; + uint64_t val; + + switch (npu3_gpu_links(dev->nvlink.gpu)) { + case 0: + return; + case 1: + mode = 0; + break; + case 2: + mode = 1; + break; + case 3: + mode = 3; + break; + case 4: + mode = 6; + break; + case 6: + mode = 10; + break; + default: + /* Hardware does not support this configuration */ + assert(0); + } + + mode += dev->nvlink.pvd->bdfn & 0x7; + + val = NPU3_GPU_MEM_BAR_ENABLE | + NPU3_GPU_MEM_BAR_POISON; + val = SETFIELD(NPU3_GPU_MEM_BAR_ADDR, val, addr >> 30); + val = SETFIELD(NPU3_GPU_MEM_BAR_SIZE, val, size >> 30); + val = SETFIELD(NPU3_GPU_MEM_BAR_MODE, val, mode); + + npu3_write(dev->npu, NPU3_GPU_MEM_BAR(dev->index), val); +} + +static struct dt_node *npu3_create_memory_dn(struct npu3_dev *dev, + uint32_t gpu_index, uint64_t addr, + uint64_t size) +{ + uint32_t nid = 255 - gpu_index; + struct dt_node *mem; + + mem = dt_find_by_name_addr(dt_root, "memory", addr); + if (mem) + return mem; + + mem = dt_new_addr(dt_root, "memory", addr); + assert(mem); + + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property_string(mem, "compatible", "ibm,coherent-device-memory"); + dt_add_property_u64s(mem, "reg", addr, size); + dt_add_property_u64s(mem, "linux,usable-memory", addr, 0); + dt_add_property_cells(mem, "ibm,chip-id", nid); + dt_add_property_cells(mem, "ibm,associativity", 4, nid, nid, nid, nid); + + NPU3INF(dev->npu, "%s mem: 0x%016llx (nid %d)\n", dev->nvlink.loc_code, + addr, nid); + + return mem; +} + +static void npu3_dev_init_gpu_mem(struct npu3_dev *dev) +{ + struct pci_device *pd = dev->nvlink.pd; + struct npu3 *npu = dev->npu; + struct dt_node *mem; + uint64_t addr, size, gta; + uint32_t gpu_index; + + if (!dev->nvlink.gpu) + return; + + gpu_index = npu3_dev_gpu_index(dev) % npu3_chip_possible_gpus(); + phys_map_get(npu->chip_id, GPU_MEM_4T_DOWN, gpu_index, &addr, &size); + + npu3_dev_assign_gmb(dev, addr, size); + mem = npu3_create_memory_dn(dev, gpu_index, addr, size); + + /* + * Coral mode address compression. This is documented in Figure 3.5 of + * the NPU workbook; "P9->GPU RA Compression (Coral)". + */ + gta = (addr >> 42 & 0x1) << 42; + gta |= (addr >> 45 & 0x3) << 43; + gta |= (addr >> 49 & 0x3) << 45; + gta |= addr & ((1ul << 43) - 1); + + dt_add_property_cells(pd->dn, "memory-region", mem->phandle); + dt_add_property_u64s(pd->dn, "ibm,device-tgt-addr", gta); +} + +static void npu3_final_fixup(void) +{ + struct npu3 *npu; + struct npu3_dev *dev; + + npu3_for_each_nvlink_npu(npu) + npu3_for_each_nvlink_dev(dev, npu) + npu3_dev_init_gpu_mem(dev); +} + +static void npu3_phb_final_fixup(struct phb *phb) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + + pci_walk_dev(phb, NULL, npu3_dev_bind, NULL); + + /* + * After every npu's devices are bound, do gpu-related fixup. This + * counts on npu3_last_npu() walking the phbs in the same order as + * the PHB final fixup loop in __pci_init_slots(). + */ + if (npu == npu3_last_npu()) + npu3_final_fixup(); +} + +static int64_t npu3_set_pe(struct phb *phb, + uint64_t pe_num, + uint64_t bdfn, + uint8_t bcompare, + uint8_t dcompare, + uint8_t fcompare, + uint8_t action) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + struct npu3_dev *dev; + uint64_t val; + + dev = npu3_bdfn_to_dev(npu, bdfn); + if (!dev) + return OPAL_PARAMETER; + + if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE) + return OPAL_PARAMETER; + + if (pe_num >= NPU3_MAX_PE_NUM) + return OPAL_PARAMETER; + + if (bcompare != OpalPciBusAll || + dcompare != OPAL_COMPARE_RID_DEVICE_NUMBER || + fcompare != OPAL_COMPARE_RID_FUNCTION_NUMBER) + return OPAL_UNSUPPORTED; + + if (!dev->nvlink.gpu) + return OPAL_SUCCESS; + + val = NPU3_CTL_BDF2PE_CFG_ENABLE; + val = SETFIELD(NPU3_CTL_BDF2PE_CFG_PE, val, pe_num); + val = SETFIELD(NPU3_CTL_BDF2PE_CFG_BDF, val, dev->nvlink.gpu->bdfn); + npu3_write(npu, NPU3_CTL_BDF2PE_CFG(pe_num), val); + + val = NPU3_MISC_BDF2PE_CFG_ENABLE; + val = SETFIELD(NPU3_MISC_BDF2PE_CFG_PE, val, pe_num); + val = SETFIELD(NPU3_MISC_BDF2PE_CFG_BDF, val, dev->nvlink.gpu->bdfn); + npu3_write(npu, NPU3_MISC_BDF2PE_CFG(pe_num), val); + + return OPAL_SUCCESS; +} + +static int64_t npu3_tce_kill_pages(struct npu3 *npu, + uint64_t pe_num, + uint32_t tce_size, + uint64_t dma_addr, + uint32_t npages) +{ + uint32_t check_tce_size; + uint64_t val; + + if (pe_num >= NPU3_MAX_PE_NUM) + return OPAL_PARAMETER; + + npu3_ioda_sel(npu, NPU3_ATS_IODA_ADDR_TBL_TVT, pe_num); + val = npu3_read(npu, NPU3_ATS_IODA_DATA); + + check_tce_size = 0x800 << GETFIELD(NPU3_ATS_IODA_TVT_PAGE_SIZE, val); + if (check_tce_size != tce_size) { + NPU3ERR(npu, "%s: Unexpected TCE size (got 0x%x, expected 0x%x)\n", + __func__, tce_size, check_tce_size); + + return OPAL_PARAMETER; + } + + val = NPU3_ATS_TCE_KILL_ONE; + val = SETFIELD(NPU3_ATS_TCE_KILL_PE_NUMBER, val, pe_num); + + while (npages--) { + val = SETFIELD(NPU3_ATS_TCE_KILL_ADDRESS, val, dma_addr >> 12); + npu3_write(npu, NPU3_ATS_TCE_KILL, val); + + dma_addr += tce_size; + } + + return OPAL_SUCCESS; +} + +static int64_t npu3_tce_kill(struct phb *phb, + uint32_t kill_type, + uint64_t pe_num, + uint32_t tce_size, + uint64_t dma_addr, + uint32_t npages) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + + sync(); + + switch(kill_type) { + case OPAL_PCI_TCE_KILL_PAGES: + return npu3_tce_kill_pages(npu, pe_num, tce_size, + dma_addr, npages); + case OPAL_PCI_TCE_KILL_PE: + /* + * NPU doesn't support killing a PE so fall through + * and do a kill all instead. + */ + case OPAL_PCI_TCE_KILL_ALL: + npu3_write(npu, NPU3_ATS_TCE_KILL, NPU3_ATS_TCE_KILL_ALL); + return OPAL_SUCCESS; + } + + return OPAL_PARAMETER; +} + +static const struct phb_ops npu_ops = { + .cfg_read8 = npu3_cfg_read8, + .cfg_read16 = npu3_cfg_read16, + .cfg_read32 = npu3_cfg_read32, + .cfg_write8 = npu3_cfg_write8, + .cfg_write16 = npu3_cfg_write16, + .cfg_write32 = npu3_cfg_write32, + .eeh_freeze_status = npu3_eeh_freeze_status, + .ioda_reset = npu3_ioda_reset, + .map_pe_dma_window = npu3_map_pe_dma_window, + .map_pe_dma_window_real = npu3_map_pe_dma_window_real, + .next_error = npu3_next_error, + .phb_final_fixup = npu3_phb_final_fixup, + .set_pe = npu3_set_pe, + .tce_kill = npu3_tce_kill, +}; + +static int64_t npu3_reset(struct pci_slot *slot) +{ + struct npu3 *npu = npu3_phb_to_npu(slot->phb); + struct npu3_dev *dev; + int64_t rc = OPAL_SUCCESS; + bool purge = false; + + npu3_for_each_nvlink_dev(dev, npu) { + rc = npu3_dev_reset(dev); + if (rc) + break; + + purge = true; + } + + /* No devices reset; don't purge, just return */ + if (!purge) + return rc; + + /* All devices reset */ + if (!rc) + return purge_l2_l3_caches(); + + /* Some devices successfully reset; purge, but still return error */ + purge_l2_l3_caches(); + return rc; +} + +static int64_t npu3_freset(struct pci_slot *slot __unused) +{ + return OPAL_SUCCESS; +} + +static int64_t npu3_get_link_state(struct pci_slot *slot __unused, + uint8_t *val) +{ + *val = OPAL_SHPC_LINK_UP_x1; + return OPAL_SUCCESS; +} + +static int64_t npu3_get_power_state(struct pci_slot *slot __unused, + uint8_t *val) +{ + *val = PCI_SLOT_POWER_ON; + return OPAL_SUCCESS; +} + +static void npu3_create_phb_slot(struct npu3 *npu) +{ + struct pci_slot *slot; + + slot = pci_slot_alloc(&npu->nvlink.phb, NULL); + if (!slot) + return; + + /* Elementary functions */ + slot->ops.creset = npu3_reset; + slot->ops.freset = npu3_freset; + slot->ops.hreset = npu3_reset; + slot->ops.get_link_state = npu3_get_link_state; + slot->ops.get_power_state = npu3_get_power_state; +} + +static void npu3_create_phb(struct npu3 *npu) +{ + struct phb *phb = &npu->nvlink.phb; + + phb->phb_type = phb_type_npu_v3; + phb->ops = &npu_ops; + phb->dt_node = dt_new_addr(dt_root, "pciex", npu->regs[0]); + assert(phb->dt_node); + + list_head_init(&phb->virt_devices); + pci_register_phb(phb, OPAL_DYNAMIC_PHB_ID); + npu3_create_phb_slot(npu); + npu3_ioda_reset(phb, true); +} + +static void npu3_dev_init_hw(struct npu3_dev *dev) +{ + struct npu3 *npu = dev->npu; + uint64_t reg, val; + + reg = NPU3_RELAXED_CFG2(dev->index); + val = npu3_read(npu, reg); + val |= NPU3_RELAXED_CFG2_CMD_CL_DMA_W | + NPU3_RELAXED_CFG2_CMD_CL_DMA_W_HP | + NPU3_RELAXED_CFG2_CMD_CL_DMA_INJ | + NPU3_RELAXED_CFG2_CMD_PR_DMA_INJ | + NPU3_RELAXED_CFG2_CMD_DMA_PR_W | + NPU3_RELAXED_CFG2_CMD_CL_RD_NC_F0 | + NPU3_RELAXED_CFG2_SRC_RDENA(0); + npu3_write(npu, reg, val); + + reg = NPU3_NTL_MISC_CFG2(dev->index); + val = npu3_read(npu, reg); + val |= NPU3_NTL_MISC_CFG2_BRICK_ENABLE | + NPU3_NTL_MISC_CFG2_NDL_TX_PARITY_ENA | + NPU3_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA | + NPU3_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA; + npu3_write(npu, reg, val); +} + +static void npu3_init_hw(struct npu3 *npu) +{ + struct npu3_dev *dev; + uint64_t reg, val; + + reg = NPU3_XTS_CFG; + val = npu3_read(npu, reg); + val |= NPU3_XTS_CFG_MMIOSD | NPU3_XTS_CFG_TRY_ATR_RO; + npu3_write(npu, reg, val); + + reg = NPU3_XTS_CFG2; + val = npu3_read(npu, reg); + val |= NPU3_XTS_CFG2_NO_FLUSH_ENA; + npu3_write(npu, reg, val); + + reg = NPU3_RELAXED_SRC(0); + val = NPU3_RELAXED_SRC_MASK_NPU; + npu3_write(npu, reg, val); + + npu3_for_each_nvlink_dev(dev, npu) + npu3_dev_init_hw(dev); +} + +/* PCI command register (BAR enable/disable) */ +static int64_t npu3_cfg_cmd(void *pvd, + struct pci_cfg_reg_filter *pcrf __unused, + uint32_t offset, uint32_t size, + uint32_t *data, bool write) +{ + struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data; + + if (!write) + return OPAL_PARTIAL; + + if (offset != PCI_CFG_CMD) + return OPAL_PARAMETER; + + if (size != 1 && size != 2 && size != 4) + return OPAL_PARAMETER; + + npu3_dev_enable_bars(dev, !!(*data & PCI_CFG_CMD_MEM_EN)); + + return OPAL_PARTIAL; +} + +static int64_t npu3_cfg_bar_write(struct npu3_bar *bar, uint64_t mask, + uint32_t data) +{ + if (data != 0xffffffff) + return OPAL_HARDWARE; + + /* Return BAR size on next read */ + bar->trap |= mask; + + return OPAL_SUCCESS; +} + +static int64_t npu3_cfg_bar_read(struct npu3_bar *bar, uint64_t mask, + uint32_t *data) +{ + if (!(bar->trap & mask)) + return OPAL_PARTIAL; + + *data = GETFIELD(mask, bar->size); + bar->trap &= ~mask; + + return OPAL_SUCCESS; +} + +/* PCI BAR registers (NTL/GENID) */ +static int64_t npu3_cfg_bar(void *pvd __unused, + struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t size, uint32_t *data, + bool write) +{ + struct npu3_bar *bar = (struct npu3_bar *)pcrf->data; + uint64_t mask; + + if (size != 4) + return OPAL_PARAMETER; + + if (offset == pcrf->start) + mask = 0xffffffff; + else if (offset == pcrf->start + 4) + mask = 0xffffffffull << 32; + else + return OPAL_PARAMETER; + + if (write) + return npu3_cfg_bar_write(bar, mask, *data); + + return npu3_cfg_bar_read(bar, mask, data); +} + +/* PCI control register */ +static int64_t npu3_cfg_devctl(void *pvd, + struct pci_cfg_reg_filter *pcrf __unused, + uint32_t offset, uint32_t size, + uint32_t *data, bool write) +{ + struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data; + + if (!write) + return OPAL_HARDWARE; + + if (size != 2 || offset & 1) { + NPU3DEVERR(dev, "Unsupported write to pcie control register\n"); + return OPAL_PARAMETER; + } + + if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET) + if (!npu3_dev_reset(dev)) + purge_l2_l3_caches(); + + return OPAL_PARTIAL; +} + +static uint32_t npu3_cfg_populate_pcie_cap(struct npu3_dev *dev, uint32_t start, + uint32_t prev_cap) +{ + struct pci_virt_device *pvd = dev->nvlink.pvd; + uint32_t val; + + /* Add capability list */ + PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start); + PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_EXP); + + /* 0x00 - ID/PCIE capability */ + val = PCI_CFG_CAP_ID_EXP; + val |= 0x2 << 16 | PCIE_TYPE_ENDPOINT << 20; + PCI_VIRT_CFG_INIT_RO(pvd, start, 4, val); + + /* 0x04 - Device capability */ + val = PCIE_MPSS_128 | + PCIE_PHANTOM_NONE << 3 | + PCIE_L0SL_MAX_NO_LIMIT << 6 | + PCIE_L1L_MAX_NO_LIMIT << 9 | + PCICAP_EXP_DEVCAP_FUNC_RESET; + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_DEVCAP, 4, val); + + pci_virt_add_filter(pvd, start + PCICAP_EXP_DEVCTL, 2, + PCI_REG_FLAG_WRITE, + npu3_cfg_devctl, NULL); + + /* 0x08 - Device control and status */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DEVCTL, 4, 0x00002810, + 0xffff0000, 0x000f0000); + + /* 0x0c - Link capability */ + val = PCIE_LSPEED_VECBIT_2 | PCIE_LWIDTH_1X << 4; + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP, 4, val); + + /* 0x10 - Link control and status */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL, 4, 0x00130000, + 0xfffff000, 0xc0000000); + + /* 0x14 - Slot capability */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCAP, 4, 0x00000000); + + /* 0x18 - Slot control and status */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCTL, 4, 0x00000000); + + /* 0x1c - Root control and capability */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RC, 4, 0x00000000, + 0xffffffe0, 0x00000000); + + /* 0x20 - Root status */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RSTAT, 4, 0x00000000, + 0xffffffff, 0x00010000); + + /* 0x24 - Device capability 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCIECAP_EXP_DCAP2, 4, 0x00000000); + + /* 0x28 - Device Control and status 2 */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DCTL2, 4, 0x00070000, + 0xffff0000, 0x00000000); + + /* 0x2c - Link capability 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP2, 4, 0x00000007); + + /* 0x30 - Link control and status 2 */ + PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL2, 4, 0x00000003, + 0xffff0000, 0x00200000); + + /* 0x34 - Slot capability 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCAP2, 4, 0x00000000); + + /* 0x38 - Slot control and status 2 */ + PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCTL2, 4, 0x00000000); + + return start + PCICAP_EXP_SCTL2 + 8; +} + +static int64_t npu3_dev_procedure_write(struct npu3_dev *dev, uint32_t offset, + uint32_t data) +{ + switch (offset) { + case 0: + NPU3DEVINF(dev, "Ignoring write to status register\n"); + break; + case 4: + npu3_dev_procedure_init(dev, data); + break; + default: + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +static int64_t npu3_dev_procedure_read(struct npu3_dev *dev, uint32_t offset, + uint32_t *data) +{ + switch (offset) { + case 0: + *data = npu3_dev_procedure_status(dev); + break; + case 4: + *data = dev->proc.number; + break; + default: + *data = 0; + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +/* Hardware procedure control/status registers */ +static int64_t npu3_dev_procedure(void *pvd, struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t size, + uint32_t *data, bool write) +{ + struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data; + + if (size != 4) + return OPAL_PARAMETER; + + offset -= pcrf->start; + + if (write) + return npu3_dev_procedure_write(dev, offset, *data); + + return npu3_dev_procedure_read(dev, offset, data); +} + +/* PPE SRAM access is indirect via CSAR/CSDR */ +static void npu3_dev_ppe_sram_sel(struct npu3_dev *dev, uint32_t reg) +{ + uint64_t val; + + val = SETFIELD(OB_PPE_CSAR_SRAM_ADDR, 0ull, reg); + xscom_write(dev->npu->chip_id, OB_PPE_CSAR(dev->ob_chiplet), val); +} + +static void npu3_dev_ppe_sram_write(struct npu3_dev *dev, uint32_t reg, + uint64_t val) +{ + npu3_dev_ppe_sram_sel(dev, reg); + xscom_write(dev->npu->chip_id, OB_PPE_CSDR(dev->ob_chiplet), val); +} + +static uint64_t npu3_dev_ppe_sram_read(struct npu3_dev *dev, uint32_t reg) +{ + uint64_t val; + + npu3_dev_ppe_sram_sel(dev, reg); + xscom_read(dev->npu->chip_id, OB_PPE_CSDR(dev->ob_chiplet), &val); + + return val; +} + +/* Software-implemented autonomous link training (SALT) */ +static int64_t npu3_dev_salt(void *pvd, struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t size, uint32_t *data, + bool write) +{ + struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data; + unsigned long timeout; + uint32_t cmd_reg; + uint64_t val; + + if (size != 4 || offset != pcrf->start) + return OPAL_PARAMETER; + + /* The config register before this one holds CMD_REG */ + pci_virt_cfg_read_raw(pvd, PCI_VIRT_CFG_NORMAL, pcrf->start - 4, + 4, &cmd_reg); + + /* Check for another command in progress */ + val = npu3_dev_ppe_sram_read(dev, OB_PPE_SALT_CMD); + if (GETFIELD(OB_PPE_SALT_CMD_READY, val)) + return OPAL_BUSY; + + val = OB_PPE_SALT_CMD_READY; + val = SETFIELD(OB_PPE_SALT_CMD_RW, val, write); + val = SETFIELD(OB_PPE_SALT_CMD_LINKNUM, val, npu3_chip_dev_index(dev)); + val = SETFIELD(OB_PPE_SALT_CMD_REG, val, cmd_reg); + if (write) + val = SETFIELD(OB_PPE_SALT_CMD_DATA, val, *data); + + npu3_dev_ppe_sram_write(dev, OB_PPE_SALT_CMD, val); + + /* Wait for the go bit to clear */ + timeout = mftb() + msecs_to_tb(1000); + + while (GETFIELD(OB_PPE_SALT_CMD_READY, val)) { + if (tb_compare(mftb(), timeout) == TB_AAFTERB) { + NPU3DEVINF(dev, "SALT_CMD 0x%x: timeout\n", cmd_reg); + return OPAL_BUSY; + } + + val = npu3_dev_ppe_sram_read(dev, OB_PPE_SALT_CMD); + } + + if (GETFIELD(OB_PPE_SALT_CMD_ERR, val)) + NPU3DEVINF(dev, "SALT_CMD 0x%x: error\n", cmd_reg); + + if (!write) + *data = GETFIELD(OB_PPE_SALT_CMD_DATA, val); + + return OPAL_SUCCESS; +} + +#define VENDOR_CAP_LEN 0x1c +#define VENDOR_CAP_VERSION 0x02 + +static uint32_t npu3_cfg_populate_vendor_cap(struct npu3_dev *dev, + uint32_t start, uint32_t prev_cap) +{ + struct pci_virt_device *pvd = dev->nvlink.pvd; + + /* Capabilities list */ + PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start); + PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR); + + /* Length and version */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, VENDOR_CAP_LEN); + PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, VENDOR_CAP_VERSION); + + /* + * Defaults when the trap can't handle the read/write (eg. due to + * reading/writing less than 4 bytes). + */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 4, 4, 0); + PCI_VIRT_CFG_INIT_RO(pvd, start + 8, 4, 0); + + /* PHY procedure trap */ + pci_virt_add_filter(pvd, start + 4, 8, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu3_dev_procedure, NULL); + + /* Link index */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, npu3_chip_dev_index(dev)); + + /* SALT registers */ + PCI_VIRT_CFG_INIT_RO(pvd, start + 0x10, 4, 0); + PCI_VIRT_CFG_INIT_RO(pvd, start + 0x14, 4, 0); + + pci_virt_add_filter(pvd, start + 0x14, 4, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu3_dev_salt, NULL); + + return start + VENDOR_CAP_LEN; +} + +static void npu3_cfg_populate(struct npu3_dev *dev) +{ + struct pci_virt_device *pvd = dev->nvlink.pvd; + uint64_t addr; + uint32_t pos; + + /* 0x00 - Vendor/Device ID */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_VENDOR_ID, 4, 0x04ea1014); + + /* 0x04 - Command/Status */ + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_CMD, 4, 0x00100000, 0xffb802b8, + 0xf9000000); + + pci_virt_add_filter(pvd, PCI_CFG_CMD, 1, PCI_REG_FLAG_WRITE, + npu3_cfg_cmd, NULL); + + /* 0x08 - Rev/Class/Cache */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_REV_ID, 4, 0x06800102); + + /* 0x0c - CLS/Latency Timer/Header/BIST */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000); + + /* 0x10/14 - NTL BAR */ + addr = SETFIELD(0xf, dev->ntl_bar.addr, + PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64); + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4, lo32(addr), 0xf, 0); + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR1, 4, hi32(addr), 0, 0); + + pci_virt_add_filter(pvd, PCI_CFG_BAR0, 8, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu3_cfg_bar, &dev->ntl_bar); + + /* 0x18/1c - GENID BAR */ + addr = SETFIELD(0xf, dev->genid_bar.addr, + PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64); + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR2, 4, lo32(addr), 0xf, 0); + PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR3, 4, hi32(addr), 0, 0); + + pci_virt_add_filter(pvd, PCI_CFG_BAR2, 8, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu3_cfg_bar, &dev->genid_bar); + + /* 0x20/0x24 - BARs, disabled */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR4, 4, 0x00000000); + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR5, 4, 0x00000000); + + /* 0x28 - Cardbus CIS pointer */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CARDBUS_CIS, 4, 0x00000000); + + /* 0x2c - Subsystem ID */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_SUBSYS_VENDOR_ID, 4, 0x00000000); + + /* 0x30 - ROM BAR, zero sized */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_ROMBAR, 4, 0xffffffff); + + /* 0x34 - PCI Capability */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CAP, 4, 0x00000000); + + /* 0x38 - Reserved */ + PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000); + + /* 0x3c - INT line/pin/Minimal grant/Maximal latency */ + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); /* INT A */ + + /* PCIE and vendor specific capability */ + pos = npu3_cfg_populate_pcie_cap(dev, 0x40, PCI_CFG_CAP); + pos = npu3_cfg_populate_vendor_cap(dev, pos, 0x41); + PCI_VIRT_CFG_INIT_RO(pvd, pos + 1, 1, 0); +} + +static void npu3_dev_create_pvd(struct npu3_dev *dev) +{ + struct npu3 *npu = dev->npu; + struct phb *phb = &npu->nvlink.phb; + + dev->nvlink.pvd = pci_virt_add_device(phb, dev->index, 0x100, dev); + if (!dev->nvlink.pvd) + return; + + phb->scan_map |= 0x1 << GETFIELD(0xf8, dev->nvlink.pvd->bdfn); + npu3_cfg_populate(dev); +} + +static void npu3_dt_add_mmio_window(struct npu3 *npu) +{ + struct dt_node *dn = npu->nvlink.phb.dt_node; + uint32_t ntl0_index = npu->index * NPU3_LINKS_PER_NPU; + uint64_t addr, size, win[2]; + + /* Device MMIO window (NTL/GENID regs only) */ + phys_map_get(npu->chip_id, NPU_NTL, ntl0_index, &win[0], NULL); + phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, &size); + win[1] = addr + size - win[0]; + + dt_add_property(dn, "ibm,mmio-window", win, sizeof(win)); + dt_add_property_cells(dn, "ranges", 0x02000000, + hi32(win[0]), lo32(win[0]), + hi32(win[0]), lo32(win[0]), + hi32(win[1]), lo32(win[1])); +} + +/* NDL No-Stall Event level */ +static uint32_t npu3_dev_interrupt_level(struct npu3_dev *dev) +{ + const uint32_t level[12] = { 1, 3, 5, 7, 9, 11, + 43, 45, 47, 49, 51, 53 }; + + return level[npu3_chip_dev_index(dev)]; +} + +static void npu3_dt_add_interrupts(struct npu3 *npu) +{ + struct dt_node *dn = npu->nvlink.phb.dt_node; + uint32_t *map, icsp, i = 0; + struct npu3_dev *dev; + size_t map_size = 0; + + npu3_for_each_nvlink_dev(dev, npu) + map_size += sizeof(*map) * 7; + + if (!map_size) + return; + + icsp = get_ics_phandle(); + map = zalloc(map_size); + assert(map); + + npu3_for_each_nvlink_dev(dev, npu) { + map[i] = dev->nvlink.pvd->bdfn << 8; + map[i + 3] = 1; /* INT A */ + map[i + 4] = icsp; /* interrupt-parent */ + map[i + 5] = npu->irq_base + npu3_dev_interrupt_level(dev); + map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL */ + i += 7; + } + + dt_add_property_cells(dn, "interrupt-parent", icsp); + dt_add_property(dn, "interrupt-map", map, map_size); + dt_add_property_cells(dn, "interrupt-map-mask", 0xff00, 0x0, 0x0, 0x7); + + free(map); +} + +/* Populate PCI root device node */ +static void npu3_dt_add_props(struct npu3 *npu) +{ + struct dt_node *dn = npu->nvlink.phb.dt_node; + + dt_add_property_cells(dn, "#address-cells", 3); + dt_add_property_cells(dn, "#size-cells", 2); + dt_add_property_cells(dn, "#interrupt-cells", 1); + dt_add_property_cells(dn, "bus-range", 0, 0xff); + dt_add_property_cells(dn, "clock-frequency", 0x200, 0); + + dt_add_property_strings(dn, "device_type", "pciex"); + /* To the OS, npu2 and npu3 are both ibm,ioda2-npu2-phb */ + dt_add_property_strings(dn, "compatible", + "ibm,power9-npu-pciex", + "ibm,ioda2-npu2-phb"); + + dt_add_property_cells(dn, "ibm,phb-index", + dt_prop_get_u32(npu->dt_node, "ibm,phb-index")); + dt_add_property_cells(dn, "ibm,phb-diag-data-size", 0); + dt_add_property_cells(dn, "ibm,opal-num-pes", NPU3_MAX_PE_NUM); + dt_add_property_cells(dn, "ibm,opal-reserved-pe", NPU3_RESERVED_PE_NUM); + dt_add_property_cells(dn, "ibm,supported-tce-sizes", + 12, /* 4K */ + 16, /* 64K */ + 24, /* 16M */ + 28); /* 256M */ + + dt_add_property_cells(dn, "ibm,chip-id", npu->chip_id); + dt_add_property_cells(dn, "ibm,npu-index", npu->index); + dt_add_property_cells(dn, "ibm,npcq", npu->dt_node->phandle); + dt_add_property_cells(dn, "ibm,xscom-base", npu->xscom_base); + dt_add_property_cells(dn, "ibm,links", NPU3_LINKS_PER_NPU); + + dt_add_property(dn, "reg", npu->regs, sizeof(npu->regs)); + dt_add_property_u64s(dn, "ibm,mmio-atsd", + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(0), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(1), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(2), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(3), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(4), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(5), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(6), + npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(7)); + + npu3_dt_add_mmio_window(npu); + npu3_dt_add_interrupts(npu); +} + +void npu3_init_nvlink(struct npu3 *npu) +{ + struct npu3_dev *dev; + + if (!npu3_next_dev(npu, NULL, NPU3_DEV_TYPE_NVLINK)) + return; + + npu3_init_hw(npu); + npu3_create_phb(npu); + + npu3_for_each_nvlink_dev(dev, npu) + npu3_dev_create_pvd(dev); + + npu3_dt_add_props(npu); + + /* TODO: Sort out if/why we still can't enable this */ + disable_fast_reboot("NVLink device enabled"); +} + +static int64_t npu3_init_context_pid(struct npu3 *npu, uint32_t index, + uint64_t msr) +{ + uint64_t map, old_map; + + /* Unfiltered XTS mode; index is lparshort */ + map = SETFIELD(NPU3_XTS_PID_MAP_LPARSHORT, 0ull, index); + + /* Enable this mapping for both real and virtual addresses */ + map |= NPU3_XTS_PID_MAP_VALID_ATRGPA0 | NPU3_XTS_PID_MAP_VALID_ATRGPA1; + + /* Enable TLBIE/MMIOSD forwarding for this entry */ + map |= NPU3_XTS_PID_MAP_VALID_ATSD; + + /* Set the relevant MSR bits */ + if (msr & MSR_DR) + map |= NPU3_XTS_PID_MAP_MSR_DR; + + if (msr & MSR_HV) + map |= NPU3_XTS_PID_MAP_MSR_HV; + + if (msr & MSR_PR) + map |= NPU3_XTS_PID_MAP_MSR_PR; + + /* We don't support anything other than 64-bit so hardcode it here */ + map |= NPU3_XTS_PID_MAP_MSR_SF; + + old_map = npu3_read(npu, NPU3_XTS_PID_MAP(index)); + + /* Error out if this entry is already set with different msr bits */ + if (old_map && GETFIELD(NPU3_XTS_PID_MAP_MSR, old_map) != + GETFIELD(NPU3_XTS_PID_MAP_MSR, map)) { + NPU3ERR(npu, "%s: Unexpected MSR value\n", __func__); + return OPAL_PARAMETER; + } + + if (!old_map) { + NPU3DBG(npu, "XTS_PID_MAP[%03d] = 0x%08llx\n", index, map); + npu3_write(npu, NPU3_XTS_PID_MAP(index), map); + } + + npu->nvlink.ctx_ref[index]++; + + return OPAL_SUCCESS; +} + +#define NPU3_VALID_ATS_MSR_BITS (MSR_DR | MSR_HV | MSR_PR | MSR_SF) + +/* + * Allocate a context ID and initialize the tables with the relevant + * information. Returns the ID or error if one couldn't be allocated. + */ +int64_t npu3_init_context(struct phb *phb, uint64_t msr, uint64_t bdf) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + uint32_t lparshort, i; + uint64_t map; + int64_t rc; + + /* + * MSR bits should be masked by the caller to allow for future + * expansion if required. + */ + if (msr & ~NPU3_VALID_ATS_MSR_BITS) + return OPAL_UNSUPPORTED; + + lock(&npu->lock); + + for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) { + map = npu3_read(npu, NPU3_XTS_BDF_MAP(i)); + + if (map && GETFIELD(NPU3_XTS_BDF_MAP_BDF, map) == bdf) + break; + } + + if (i == NPU3_XTS_BDF_MAP_MAX) { + NPU3ERR(npu, "LPARID not associated with any GPU\n"); + rc = OPAL_PARAMETER; + goto out; + } + + lparshort = GETFIELD(NPU3_XTS_BDF_MAP_LPARSHORT, map); + NPU3DBG(npu, "Found LPARSHORT 0x%x for bdf %02llx:%02llx.%llx\n", + lparshort, bdf >> 8 & 0xff, bdf >> 3 & 0x1f, bdf & 0x7); + + rc = npu3_init_context_pid(npu, lparshort, msr); + if (rc) + goto out; + + if (!(map & NPU3_XTS_BDF_MAP_VALID)) { + map |= NPU3_XTS_BDF_MAP_VALID; + npu3_write(npu, NPU3_XTS_BDF_MAP(i), map); + } + + rc = lparshort; + +out: + unlock(&npu->lock); + return rc; +} + +static int64_t npu3_destroy_context_pid(struct npu3 *npu, uint32_t index) +{ + if (!npu->nvlink.ctx_ref[index]) + return OPAL_PARAMETER; + + /* Only destroy when refcount hits 0 */ + if (--npu->nvlink.ctx_ref[index]) + return OPAL_PARTIAL; + + NPU3DBG(npu, "XTS_PID_MAP[%03d] = 0 (destroy)\n", index); + npu3_write(npu, NPU3_XTS_PID_MAP(index), 0ull); + + return OPAL_SUCCESS; +} + +int64_t npu3_destroy_context(struct phb *phb, uint64_t bdf) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + uint32_t lparshort, i; + int64_t map, rc; + + lock(&npu->lock); + + for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) { + map = npu3_read(npu, NPU3_XTS_BDF_MAP(i)); + + if (map && GETFIELD(NPU3_XTS_BDF_MAP_BDF, map) == bdf) + break; + } + + if (i == NPU3_XTS_BDF_MAP_MAX) { + NPU3ERR(npu, "LPARID not associated with any GPU\n"); + rc = OPAL_PARAMETER; + goto out; + } + + lparshort = GETFIELD(NPU3_XTS_BDF_MAP_LPARSHORT, map); + rc = npu3_destroy_context_pid(npu, lparshort); + +out: + unlock(&npu->lock); + return rc; +} + +/* Map the given virtual bdf to lparid with given lpcr */ +int64_t npu3_map_lpar(struct phb *phb, uint64_t bdf, uint64_t lparid, + uint64_t lpcr) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + struct npu3_dev *dev; + int64_t rc = OPAL_SUCCESS; + uint64_t map, val; + uint32_t i; + + /* + * The LPCR bits are only required for hash based ATS, which we don't + * currently support, but may need to in the future. + */ + if (lpcr) + return OPAL_UNSUPPORTED; + + lock(&npu->lock); + + /* Update the entry if it already exists */ + for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) { + map = npu3_read(npu, NPU3_XTS_BDF_MAP(i)); + + if (map && GETFIELD(NPU3_XTS_BDF_MAP_BDF, map) == bdf) + break; + } + + if (i == NPU3_XTS_BDF_MAP_MAX) { + /* No existing mapping found, find space for a new one */ + for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) + if (!npu3_read(npu, NPU3_XTS_BDF_MAP(i))) + break; + } + + if (i == NPU3_XTS_BDF_MAP_MAX) { + NPU3ERR(npu, "No free XTS_BDF[] entry\n"); + rc = OPAL_RESOURCE; + goto out; + } + + map = NPU3_XTS_BDF_MAP_UNFILT; + map = SETFIELD(NPU3_XTS_BDF_MAP_BDF, map, bdf); + map = SETFIELD(NPU3_XTS_BDF_MAP_LPARID, map, lparid); + map = SETFIELD(NPU3_XTS_BDF_MAP_LPARSHORT, map, i); + + /* We only support radix at the moment */ + map = SETFIELD(NPU3_XTS_BDF_MAP_XLAT, map, 0x3); + + /* Find a link on which to send ATSDs for this device */ + npu3_for_each_nvlink_dev(dev, npu) + if (dev->nvlink.gpu->bdfn == bdf) + break; + + if (!dev || dev->nvlink.gpu->bdfn != bdf) { + NPU3ERR(npu, "Can't find a link for bdf %02llx:%02llx.%llx\n", + bdf >> 8 & 0xff, bdf >> 3 & 0x1f, bdf & 0x7); + rc = OPAL_PARAMETER; + goto out; + } + + map = SETFIELD(NPU3_XTS_BDF_MAP_BRICK, map, dev->index); + + NPU3DBG(npu, "XTS_BDF_MAP[%03d] = 0x%08llx\n", i, map); + npu3_write(npu, NPU3_XTS_BDF_MAP(i), map); + + /* We need to allocate an ATSD per link */ + val = SETFIELD(NPU3_XTS_ATSD_HYP_LPARID, 0ull, lparid); + if (!lparid) + val |= NPU3_XTS_ATSD_HYP_MSR_HV; + + npu3_write(npu, NPU3_XTS_ATSD_HYP(dev->index), val); + +out: + unlock(&npu->lock); + return rc; +} + +static int64_t npu3_relaxed_order_enable(struct npu3 *npu, uint64_t src) +{ + struct npu3_dev *dev; + uint32_t i; + + for (i = 0; i < NPU3_RELAXED_SRC_MAX; i++) + if (npu3_read(npu, NPU3_RELAXED_SRC(i)) == src) + return OPAL_SUCCESS; /* Already enabled */ + + /* Find somewhere to write this source */ + for (i = 0; i < NPU3_RELAXED_SRC_MAX; i++) + if (!npu3_read(npu, NPU3_RELAXED_SRC(i))) + break; + + if (i == NPU3_RELAXED_SRC_MAX) { + NPU3ERR(npu, "Insufficient resources to activate relaxed ordering mode\n"); + return OPAL_RESOURCE; + } + + npu3_write(npu, NPU3_RELAXED_SRC(i), src); + + npu3_for_each_nvlink_dev(dev, npu) { + uint64_t val = npu3_read(npu, NPU3_RELAXED_CFG2(dev->index)); + + val |= NPU3_RELAXED_CFG2_SRC_WRENA(i) | + NPU3_RELAXED_CFG2_SRC_RDENA(i); + npu3_write(npu, NPU3_RELAXED_CFG2(dev->index), val); + } + + return OPAL_SUCCESS; +} + +static void npu3_relaxed_order_disable(struct npu3 *npu, uint64_t src) +{ + struct npu3_dev *dev; + uint32_t i; + + for (i = 0; i < NPU3_RELAXED_SRC_MAX; i++) + if (npu3_read(npu, NPU3_RELAXED_SRC(i)) == src) + break; + + if (i == NPU3_RELAXED_SRC_MAX) + return; /* Already disabled */ + + npu3_for_each_nvlink_dev(dev, npu) { + uint64_t val = npu3_read(npu, NPU3_RELAXED_CFG2(dev->index)); + + val &= ~NPU3_RELAXED_CFG2_SRC_WRENA(i); + val &= ~NPU3_RELAXED_CFG2_SRC_RDENA(i); + npu3_write(npu, NPU3_RELAXED_CFG2(dev->index), val); + } + + npu3_write(npu, NPU3_RELAXED_SRC(i), 0ull); +} + +/* Enable or disable relaxed ordering on all nvlinks for a given PEC. */ +int64_t npu3_set_relaxed_order(struct phb *phb, uint32_t gcid, int pec, + bool enable) +{ + struct npu3 *npu = npu3_phb_to_npu(phb); + int64_t rc = OPAL_SUCCESS; + uint64_t src; + + NPU3INF(npu, "%s relaxed ordering for PEC %d on chip %d\n", + enable ? "Enabling" : "Disabling", + pec, gcid); + + lock(&npu->lock); + + src = SETFIELD(NPU3_RELAXED_SRC_GRPCHP, 0ull, gcid); + src = SETFIELD(NPU3_RELAXED_SRC_PEC, src, pec); + src = SETFIELD(NPU3_RELAXED_SRC_RDSTART, src, 0); + src = SETFIELD(NPU3_RELAXED_SRC_RDEND, src, 47); + src = SETFIELD(NPU3_RELAXED_SRC_WRSTART, src, 0); + src = SETFIELD(NPU3_RELAXED_SRC_WREND, src, 23); + + if (enable) + rc = npu3_relaxed_order_enable(npu, src); + else + npu3_relaxed_order_disable(npu, src); + + unlock(&npu->lock); + return rc; +} diff --git a/hw/npu3.c b/hw/npu3.c new file mode 100644 index 0000000..22ccef2 --- /dev/null +++ b/hw/npu3.c @@ -0,0 +1,554 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <io.h> +#include <xscom.h> +#include <npu3.h> +#include <npu3-regs.h> +#include <nvram.h> +#include <interrupts.h> +#include <xive.h> + +#define NPU3LOG(l, npu, fmt, a...) \ + prlog(l, "NPU[%d:%d]: " fmt, (npu)->chip_id, (npu)->index, ##a) +#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a) +#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a) +#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a) + +#define NPU3DEVLOG(l, dev, fmt, a...) \ + prlog(l, "NPU[%d:%d:%d]: " fmt, \ + (dev)->npu->chip_id, \ + (dev)->npu->index, \ + (dev)->index, ##a) +#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a) +#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a) +#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a) + +static void npu3_dt_create_link(struct dt_node *npu, uint32_t npu_index, + uint32_t dev_index) +{ + struct dt_node *link; + uint32_t phy_lane_mask, ob_chiplet; + + link = dt_new_addr(npu, "link", dev_index); + + dt_add_property_string(link, "compatible", "ibm,npu-link"); + dt_add_property_cells(link, "reg", dev_index); + dt_add_property_cells(link, "ibm,npu-link-index", dev_index); + + switch (npu_index) { + case 0: + /* fall through */ + case 2: + ob_chiplet = npu_index ? 3 : 0; + + switch (dev_index) { + case 0: + phy_lane_mask = PPC_BITMASK32(0, 3); + break; + case 1: + phy_lane_mask = PPC_BITMASK32(13, 16); + break; + case 2: + phy_lane_mask = PPC_BITMASK32(7, 10); + break; + case 3: + phy_lane_mask = PPC_BITMASK32(20, 23); + break; + } + + break; + case 1: + switch (dev_index) { + case 0: + ob_chiplet = 1; + phy_lane_mask = PPC_BITMASK32(0, 3); + break; + case 1: + ob_chiplet = 2; + phy_lane_mask = PPC_BITMASK32(0, 3); + break; + case 2: + ob_chiplet = 1; + phy_lane_mask = PPC_BITMASK32(7, 10); + break; + case 3: + ob_chiplet = 2; + phy_lane_mask = PPC_BITMASK32(7, 10); + break; + } + + break; + default: + return; + } + + dt_add_property_cells(link, "ibm,npu-phy", ob_chiplet); + dt_add_property_cells(link, "ibm,npu-lane-mask", phy_lane_mask); +} + +static void npu3_dt_create_npu(struct dt_node *xscom, uint32_t npu_index) +{ + const uint32_t npu_base[] = { 0x5011000, 0x5011400, 0x3011c00 }; + struct dt_node *npu; + + npu = dt_new_addr(xscom, "npu", npu_base[npu_index]); + + dt_add_property_cells(npu, "#size-cells", 0); + dt_add_property_cells(npu, "#address-cells", 1); + dt_add_property_cells(npu, "reg", npu_base[npu_index], 0x2c); + dt_add_property_string(npu, "compatible", "ibm,power9-npu3"); + dt_add_property_cells(npu, "ibm,npu-index", npu_index); + dt_add_property_cells(npu, "ibm,phb-index", 7 + npu_index); + + for (uint32_t i = 0; i < NPU3_LINKS_PER_NPU; i++) + npu3_dt_create_link(npu, npu_index, i); +} + +/* This can be removed when/if we decide to use HDAT instead */ +static bool npu3_dt_create(void) +{ + struct proc_chip *chip = next_chip(NULL); + struct dt_node *xscom; + + /* npu3 chips only */ + if (proc_gen < proc_gen_p9 || + chip->type == PROC_CHIP_P9_NIMBUS || + chip->type == PROC_CHIP_P9_CUMULUS) + return false; + + dt_for_each_compatible(dt_root, xscom, "ibm,xscom") + for (uint32_t i = 0; i < 3; i++) + npu3_dt_create_npu(xscom, i); + + return true; +} + +static struct npu3 *npu3_create(struct dt_node *dn) +{ + struct npu3 *npu; + struct dt_node *link; + struct npu3_dev *dev; + char *path; + uint32_t i; + + npu = zalloc(sizeof(*npu)); + assert(npu); + + init_lock(&npu->lock); + + npu->dt_node = dn; + npu->index = dt_prop_get_u32(dn, "ibm,npu-index"); + npu->xscom_base = dt_get_address(dn, 0, NULL); + + npu->chip_id = dt_get_chip_id(dn); + assert(get_chip(npu->chip_id)); + + dt_for_each_compatible(dn, link, "ibm,npu-link") { + i = dt_prop_get_u32(link, "ibm,npu-link-index"); + assert(i < NPU3_LINKS_PER_NPU); + + dev = &npu->devices[i]; + dev->index = i; + dev->npu = npu; + dev->dn = link; + dev->ob_chiplet = dt_prop_get_u32(link, "ibm,npu-phy"); + dev->phy_lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask"); + dev->proc.status = NPU3_PROC_COMPLETE; + }; + + path = dt_get_path(dn); + NPU3INF(npu, "Found %s\n", path); + NPU3INF(npu, "SCOM base: 0x%llx\n", npu->xscom_base); + free(path); + + return npu; +} + +struct npu3_dev *npu3_next_dev(struct npu3 *npu, struct npu3_dev *dev, + enum npu3_dev_type type) +{ + uint32_t i = 0; + + if (dev) + i = dev->index + 1; + + for (; i < NPU3_LINKS_PER_NPU; i++) { + dev = &npu->devices[i]; + + if (dev->type == type || type == NPU3_DEV_TYPE_ANY) + return dev; + } + + return NULL; +} + +static void npu3_device_detect_fixup(struct npu3_dev *dev) +{ + struct dt_node *dn = dev->dn; + + if (dev->type == NPU3_DEV_TYPE_NVLINK) { + dt_add_property_strings(dn, "ibm,npu-link-type", "nvlink"); + dev->link_speed = dt_prop_get_u32_def( + dn, "nvidia,link-speed", 0xff); + return; + } + + NPU3DEVDBG(dev, "Link type unknown\n"); + dt_add_property_strings(dn, "ibm,npu-link-type", "unknown"); +} + +/* + * We use the indirect method because it uses the same addresses as + * the MMIO offsets (NPU RING) + */ +static void npu3_scom_sel(struct npu3 *npu, uint64_t reg, uint64_t size) +{ + uint64_t val; + + val = SETFIELD(NPU3_MISC_DA_ADDR, 0ull, reg); + val = SETFIELD(NPU3_MISC_DA_LEN, val, size); + xscom_write(npu->chip_id, + npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_ADDR, + val); +} + +static void npu3_scom_write(struct npu3 *npu, uint64_t reg, uint64_t size, + uint64_t val) +{ + npu3_scom_sel(npu, reg, size); + xscom_write(npu->chip_id, + npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA, + val); +} + +static uint64_t npu3_scom_read(struct npu3 *npu, uint64_t reg, uint64_t size) +{ + uint64_t val; + + npu3_scom_sel(npu, reg, size); + xscom_read(npu->chip_id, + npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA, + &val); + + return val; +} + +void npu3_write(struct npu3 *npu, uint64_t reg, uint64_t val) +{ + void *mmio = (void *)npu->regs[0]; + + if (mmio) + out_be64(mmio + reg, val); + else + npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_8B, val); + + /* CQ_SM writes should be mirrored in all four blocks */ + if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0)) + return; + + for (uint32_t i = 1; i < 4; i++) + npu3_write(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg), + val); +} + +uint64_t npu3_read(struct npu3 *npu, uint64_t reg) +{ + void *mmio = (void *)npu->regs[0]; + + if (mmio) + return in_be64(mmio + reg); + + return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_8B); +} + +void npu3_write_4b(struct npu3 *npu, uint64_t reg, uint32_t val) +{ + void *mmio = (void *)npu->regs[0]; + + if (mmio) + out_be32(mmio + reg, val); + else + npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_4B, + (uint64_t)val << 32); + + if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0)) + return; + + for (uint32_t i = 1; i < 4; i++) + npu3_write_4b(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg), + val); +} + +uint32_t npu3_read_4b(struct npu3 *npu, uint64_t reg) +{ + void *mmio = (void *)npu->regs[0]; + + if (mmio) + return in_be32(mmio + reg); + + return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_4B) >> 32; +} + +static void npu3_misc_config(struct npu3 *npu) +{ + struct npu3_dev *dev; + uint32_t typemap = 0; + uint64_t reg, val; + + npu3_for_each_nvlink_dev(dev, npu) + typemap |= 0x10 >> dev->index; + + reg = NPU3_SM_MISC_CFG0; + val = npu3_read(npu, reg); + val |= NPU3_SM_MISC_CFG0_ENABLE_PBUS; + val &= ~NPU3_SM_MISC_CFG0_ENABLE_SNARF_CPM; + val = SETFIELD(NPU3_SM_MISC_CFG0_NVLINK_MODE, val, typemap); + val = SETFIELD(NPU3_SM_MISC_CFG0_OCAPI_MODE, val, ~typemap); + npu3_write(npu, reg, val); + + reg = NPU3_CTL_MISC_CFG2; + val = npu3_read(npu, reg); + val = SETFIELD(NPU3_CTL_MISC_CFG2_NVLINK_MODE, val, typemap); + val = SETFIELD(NPU3_CTL_MISC_CFG2_OCAPI_MODE, val, ~typemap); + npu3_write(npu, reg, val); + + reg = NPU3_DAT_MISC_CFG1; + val = npu3_read(npu, reg); + val = SETFIELD(NPU3_DAT_MISC_CFG1_NVLINK_MODE, val, typemap); + val = SETFIELD(NPU3_DAT_MISC_CFG1_OCAPI_MODE, val, ~typemap); + npu3_write(npu, reg, val); +} + +static void npu3_assign_bars(struct npu3 *npu) +{ + struct npu3_dev *dev; + uint64_t addr, size, val; + + /* Global MMIO bar (per npu) */ + phys_map_get(npu->chip_id, NPU_REGS, npu->index, &addr, &size); + val = SETFIELD(NPU3_MMIO_BAR_ADDR, 0ull, addr >> 24); + val |= NPU3_MMIO_BAR_ENABLE; + npu3_write(npu, NPU3_MMIO_BAR, val); + + NPU3INF(npu, "MMIO base: 0x%016llx (%lldMB)\n", addr, size >> 20); + npu->regs[0] = addr; + npu->regs[1] = size; + + /* NTL bar (per device) */ + npu3_for_each_dev(dev, npu) { + phys_map_get(npu->chip_id, NPU_NTL, npu3_chip_dev_index(dev), + &addr, &size); + val = SETFIELD(NPU3_NTL_BAR_ADDR, 0ull, addr >> 16); + val = SETFIELD(NPU3_NTL_BAR_SIZE, val, ilog2(size >> 16)); + npu3_write(npu, NPU3_NTL_BAR(dev->index), val); + + dev->ntl_bar.addr = addr; + dev->ntl_bar.size = size; + } + + /* GENID bar (logically divided per device) */ + phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, NULL); + val = SETFIELD(NPU3_GENID_BAR_ADDR, 0ull, addr >> 19); + npu3_write(npu, NPU3_GENID_BAR, val); + + npu3_for_each_dev(dev, npu) { + dev->genid_bar.addr = addr + (dev->index << 16); + dev->genid_bar.size = 64 << 10; + } +} + +void npu3_dev_enable_bars(struct npu3_dev *dev, bool enable) +{ + struct npu3 *npu = dev->npu; + uint64_t reg, val; + + if (dev->ntl_bar.enable == enable) /* No state change */ + return; + + dev->ntl_bar.enable = enable; + dev->genid_bar.enable = enable; + + reg = NPU3_NTL_BAR(dev->index); + val = npu3_read(npu, reg); + val = SETFIELD(NPU3_NTL_BAR_ENABLE, val, enable); + npu3_write(npu, reg, val); + + /* + * Generation IDs are a single space in the hardware but we split them + * per device. Only disable in hardware if every device has disabled. + */ + if (!enable) + npu3_for_each_dev(dev, npu) + if (dev->genid_bar.enable) + return; + + reg = NPU3_GENID_BAR; + val = npu3_read(npu, reg); + val = SETFIELD(NPU3_GENID_BAR_ENABLE, val, enable); + npu3_write(npu, reg, val); +} + +static uint64_t npu3_ipi_attributes(struct irq_source *is, uint32_t isn) +{ + struct npu3 *npu = is->data; + uint32_t level = isn - npu->irq_base; + + /* TCE interrupt is used to detect a frozen PE */ + if (level == 18) + return IRQ_ATTR_TARGET_OPAL | + IRQ_ATTR_TARGET_RARE | + IRQ_ATTR_TYPE_MSI; + + return IRQ_ATTR_TARGET_LINUX; +} + +static void npu3_ipi_interrupt(struct irq_source *is, uint32_t isn) +{ + struct npu3 *npu = is->data; + uint32_t level = isn - npu->irq_base; + + if (level != 18) { + NPU3ERR(npu, "Received unknown interrupt %d\n", level); + return; + } + + opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR); +} + +#define NPU3_IRQ_LEVELS 60 + +static char *npu3_ipi_name(struct irq_source *is, uint32_t isn) +{ + struct npu3 *npu = is->data; + uint32_t level = isn - npu->irq_base; + static const char *names[NPU3_IRQ_LEVELS] = { + [0] = "NDL 0 Stall Event (brick 0)", + [1] = "NDL 0 No-Stall Event (brick 0)", + [2] = "NDL 1 Stall Event (brick 1)", + [3] = "NDL 1 No-Stall Event (brick 1)", + [4] = "NDL 2 Stall Event (brick 2)", + [5] = "NDL 2 No-Stall Event (brick 2)", + [6] = "NDL 3 Stall Event (brick 3)", + [7] = "NDL 3 No-Stall Event (brick 3)", + [8] = "NDL 4 Stall Event (brick 4)", + [9] = "NDL 4 No-Stall Event (brick 4)", + [10] = "NDL 5 Stall Event (brick 5)", + [11] = "NDL 5 No-Stall Event (brick 5)", + [12] = "NTL 0 Event", + [13] = "NTL 1 Event", + [14] = "NTL 2 Event", + [15] = "NTL 3 Event", + [16] = "NTL 4 Event", + [17] = "NTL 5 Event", + [18] = "TCE Event", + [19] = "ATS Event", + [20] = "CQ Event", + [21] = "MISC Event", + [41] = "Memory Controller Event", + [42] = "NDL 6 Stall Event (brick 6)", + [43] = "NDL 6 No-Stall Event (brick 6)", + [44] = "NDL 7 Stall Event (brick 7)", + [45] = "NDL 7 No-Stall Event (brick 7)", + [46] = "NDL 8 Stall Event (brick 8)", + [47] = "NDL 8 No-Stall Event (brick 8)", + [48] = "NDL 9 Stall Event (brick 9)", + [49] = "NDL 9 No-Stall Event (brick 9)", + [50] = "NDL 10 Stall Event (brick 10)", + [51] = "NDL 10 No-Stall Event (brick 10)", + [52] = "NDL 11 Stall Event (brick 11)", + [53] = "NDL 11 No-Stall Event (brick 11)", + [54] = "NTL 6 Event", + [55] = "NTL 7 Event", + [56] = "NTL 8 Event", + [57] = "NTL 9 Event", + [58] = "NTL 10 Event", + [59] = "NTL 11 Event", + }; + + if (level >= NPU3_IRQ_LEVELS || !names[level]) + return strdup("Unknown"); + + return strdup(names[level]); +} + +static const struct irq_source_ops npu3_ipi_ops = { + .attributes = npu3_ipi_attributes, + .interrupt = npu3_ipi_interrupt, + .name = npu3_ipi_name, +}; + +static void npu3_setup_irqs(struct npu3 *npu) +{ + uint64_t reg, val; + uint32_t base; + + base = xive_alloc_ipi_irqs(npu->chip_id, NPU3_IRQ_LEVELS, 64); + if (base == XIVE_IRQ_ERROR) { + NPU3ERR(npu, "Failed to allocate interrupt sources\n"); + return; + } + + xive_register_ipi_source(base, NPU3_IRQ_LEVELS, npu, &npu3_ipi_ops); + + /* Set IPI configuration */ + reg = NPU3_MISC_CFG; + val = npu3_read(npu, reg); + val = SETFIELD(NPU3_MISC_CFG_IPI_PS, val, NPU3_MISC_CFG_IPI_PS_64K); + val = SETFIELD(NPU3_MISC_CFG_IPI_OS, val, NPU3_MISC_CFG_IPI_OS_AIX); + npu3_write(npu, reg, val); + + /* Set IRQ base */ + reg = NPU3_MISC_INT_BAR; + val = SETFIELD(NPU3_MISC_INT_BAR_ADDR, 0ull, + (uint64_t)xive_get_trigger_port(base) >> 12); + npu3_write(npu, reg, val); + + npu->irq_base = base; +} + +static void npu3_init(struct npu3 *npu) +{ + struct npu3_dev *dev; + + platform.npu3_device_detect(npu); + npu3_for_each_dev(dev, npu) + npu3_device_detect_fixup(dev); + + npu3_misc_config(npu); + npu3_assign_bars(npu); + npu3_setup_irqs(npu); + npu3_init_nvlink(npu); +} + +void probe_npu3(void) +{ + struct dt_node *dn; + struct npu3 *npu; + + if (!npu3_dt_create()) + return; + + if (!platform.npu3_device_detect) { + prlog(PR_INFO, "NPU: Platform does not support NPU\n"); + return; + } + + dt_for_each_compatible(dt_root, dn, "ibm,power9-npu3") { + npu = npu3_create(dn); + npu3_init(npu); + } +} diff --git a/include/npu3-regs.h b/include/npu3-regs.h new file mode 100644 index 0000000..2e1dc39 --- /dev/null +++ b/include/npu3-regs.h @@ -0,0 +1,248 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NPU3_REGS_H +#define __NPU3_REGS_H + +#define NPU3_FIR(n) (0x2c00 + (n) * 0x40) +#define NPU3_FIR_MASK(n) (0x2c03 + (n) * 0x40) +#define NPU3_FIR_ACTION0(n) (0x2c06 + (n) * 0x40) +#define NPU3_FIR_ACTION1(n) (0x2c07 + (n) * 0x40) +#define NPU3_FIR_MAX 3 + +/* NPU RING: Indirect address/data port */ +#define NPU3_MISC_SCOM_IND_SCOM_ADDR 0x33e +#define NPU3_MISC_DA_ADDR PPC_BITMASK(0, 23) +#define NPU3_MISC_DA_LEN PPC_BITMASK(24, 25) +#define NPU3_MISC_DA_LEN_4B 2 +#define NPU3_MISC_DA_LEN_8B 3 +#define NPU3_MISC_SCOM_IND_SCOM_DATA 0x33f + +/* NPU RING: Indirect register blocks */ +#define NPU3_BLOCK(nib0, nib1) ((nib0) << 20 | (nib1) << 16) +#define NPU3_REG_BLOCK(reg) ((reg) & 0xff0000) +#define NPU3_REG_OFFSET(reg) ((reg) & 0xffff) + +#define NPU3_BLOCK_NDL_U(brk) NPU3_BLOCK(0 + (brk) / 2,\ + 8 + (brk) % 2 * 2) +#define NPU3_BLOCK_NTL_U(brk) NPU3_BLOCK(0 + (brk) / 2,\ + 9 + (brk) % 2 * 2) +#define NPU3_BLOCK_CQ_SM(n) NPU3_BLOCK(4, (n)) +#define NPU3_BLOCK_CQ_CTL NPU3_BLOCK(4, 4) +#define NPU3_BLOCK_CQ_DAT NPU3_BLOCK(4, 5) +#define NPU3_BLOCK_NDL(brk) NPU3_BLOCK(4 + (brk) / 2,\ + 8 + (brk) % 2 * 2) +#define NPU3_BLOCK_NTL(brk) NPU3_BLOCK(4 + (brk) / 2,\ + 9 + (brk) % 2 * 2) +#define NPU3_BLOCK_NPU_ATS NPU3_BLOCK(7, 0) +#define NPU3_BLOCK_NPU_XTS NPU3_BLOCK(7, 1) +#define NPU3_BLOCK_NPU_MISC NPU3_BLOCK(7, 2) +#define NPU3_BLOCK_NPU_XTS_ATSD(n) NPU3_BLOCK(8, (n)) + +/* NDL_U block registers */ +#define NPU3_DLPL_CTL(brk) (NPU3_BLOCK_NDL_U(brk) + 0xfff4) +#define NPU3_DLPL_CTL_RESET_RX PPC_BIT32(0) +#define NPU3_DLPL_CTL_RESET_MISC PPC_BIT32(1) +#define NPU3_DLPL_CFG(brk) (NPU3_BLOCK_NDL_U(brk) + 0xfff8) +#define NPU3_DLPL_CFG_PRI_BYTESWAP PPC_BIT32(0) + +/* NTL_U block registers */ +#define NPU3_NTL_MISC_CFG1(brk) (NPU3_BLOCK_NTL_U(brk) + 0x0c0) +#define NPU3_NTL_MISC_CFG1_NTL_RESET PPC_BITMASK(8, 9) +#define NPU3_NTL_CREQ_HDR_CRED_SND(brk) (NPU3_BLOCK_NTL_U(brk) + 0x400) +#define NPU3_NTL_PRB_HDR_CRED_SND(brk) (NPU3_BLOCK_NTL_U(brk) + 0x410) +#define NPU3_NTL_ATR_HDR_CRED_SND(brk) (NPU3_BLOCK_NTL_U(brk) + 0x418) +#define NPU3_NTL_RSP_HDR_CRED_SND(brk) (NPU3_BLOCK_NTL_U(brk) + 0x428) +#define NPU3_NTL_CREQ_DAT_CRED_SND(brk) (NPU3_BLOCK_NTL_U(brk) + 0x430) +#define NPU3_NTL_RSP_DAT_CRED_SND(brk) (NPU3_BLOCK_NTL_U(brk) + 0x438) +#define NPU3_NTL_CREQ_HDR_CRED_RCV(brk) (NPU3_BLOCK_NTL_U(brk) + 0x440) +#define NPU3_NTL_DGD_HDR_CRED_RCV(brk) (NPU3_BLOCK_NTL_U(brk) + 0x448) +#define NPU3_NTL_ATSD_HDR_CRED_RCV(brk) (NPU3_BLOCK_NTL_U(brk) + 0x460) +#define NPU3_NTL_RSP_HDR_CRED_RCV(brk) (NPU3_BLOCK_NTL_U(brk) + 0x468) +#define NPU3_NTL_CREQ_DAT_CRED_RCV(brk) (NPU3_BLOCK_NTL_U(brk) + 0x470) +#define NPU3_NTL_RSP_DAT_CRED_RCV(brk) (NPU3_BLOCK_NTL_U(brk) + 0x478) +#define NPU3_NTL_CQ_FENCE_STATUS(brk) (NPU3_BLOCK_NTL_U(brk) + 0x500) +#define NPU3_NTL_CQ_FENCE_STATUS_FIELD PPC_BITMASK(0, 1) +#define NPU3_NTL_CQ_FENCE_STATUS_FULL 3 +#define NPU3_NTL_CQ_FENCE_STATUS_HALF 2 +#define NPU3_NTL_CQ_FENCE_STATUS_NONE 0 + +/* + * CQ_SM block registers + * + * Definitions here use NPU3_BLOCK_CQ_SM(0), but when npu3_write() is given + * one of these, it will do corresponding writes to every CQ_SM block. + */ +#define NPU3_SM_MISC_CFG0 (NPU3_BLOCK_CQ_SM(0) + 0x000) +#define NPU3_SM_MISC_CFG0_ENABLE_PBUS PPC_BIT(26) +#define NPU3_SM_MISC_CFG0_ENABLE_SNARF_CPM PPC_BIT(27) +#define NPU3_SM_MISC_CFG0_OCAPI_MODE PPC_BITMASK(44, 48) +#define NPU3_SM_MISC_CFG0_NVLINK_MODE PPC_BITMASK(49, 53) +#define NPU3_SM_MISC_CFG1 (NPU3_BLOCK_CQ_SM(0) + 0x008) +#define NPU3_SM_MISC_CFG2 (NPU3_BLOCK_CQ_SM(0) + 0x0f0) +#define NPU3_GPU_MEM_BAR(brk) (NPU3_BLOCK_CQ_SM(0) + 0x190 + (brk) * 8) +#define NPU3_GPU_MEM_BAR_ENABLE PPC_BIT(0) +#define NPU3_GPU_MEM_BAR_ADDR_MASK PPC_BITMASK(1, 35) +#define NPU3_GPU_MEM_BAR_ADDR PPC_BITMASK(1, 21) +#define NPU3_GPU_MEM_BAR_SIZE PPC_BITMASK(22, 35) +#define NPU3_GPU_MEM_BAR_SL_MODE PPC_BIT(36) +#define NPU3_GPU_MEM_BAR_4T_LIMIT PPC_BIT(37) +#define NPU3_GPU_MEM_BAR_4T_SELECT PPC_BITMASK(38, 39) +#define NPU3_GPU_MEM_BAR_MODE PPC_BITMASK(40, 43) +#define NPU3_GPU_MEM_BAR_POISON PPC_BIT(45) +#define NPU3_GPU_MEM_BAR_CHIP_EQ_GROUP PPC_BIT(49) +#define NPU3_NTL_BAR(brk) (NPU3_BLOCK_CQ_SM(0) + 0x1b8 + (brk) * 8) +#define NPU3_NTL_BAR_ENABLE PPC_BIT(0) +#define NPU3_NTL_BAR_ADDR PPC_BITMASK(3, 35) +#define NPU3_NTL_BAR_SIZE PPC_BITMASK(39, 43) +#define NPU3_NTL_BAR_SIZE_128K 1 +#define NPU3_MMIO_BAR (NPU3_BLOCK_CQ_SM(0) + 0x1e0) +#define NPU3_MMIO_BAR_ENABLE PPC_BIT(0) +#define NPU3_MMIO_BAR_ADDR PPC_BITMASK(3, 27) +#define NPU3_GENID_BAR (NPU3_BLOCK_CQ_SM(0) + 0x1e8) +#define NPU3_GENID_BAR_ENABLE PPC_BIT(0) +#define NPU3_GENID_BAR_ADDR PPC_BITMASK(3, 32) +#define NPU3_RELAXED_SRC(n) (NPU3_BLOCK_CQ_SM(0) + 0x1f0 + (n) * 8) +#define NPU3_RELAXED_SRC_MAX 4 +#define NPU3_RELAXED_SRC_TAG PPC_BITMASK(0, 13) +#define NPU3_RELAXED_SRC_GRPCHP PPC_BITMASK(0, 6) +#define NPU3_RELAXED_SRC_PEC PPC_BITMASK(12, 13) +#define NPU3_RELAXED_SRC_TAGMASK PPC_BITMASK(14, 27) +#define NPU3_RELAXED_SRC_MASK_NPU PPC_BIT(28) +#define NPU3_RELAXED_SRC_MASK_PCIE PPC_BIT(29) +#define NPU3_RELAXED_SRC_MASK_L2L3 PPC_BIT(30) +#define NPU3_RELAXED_SRC_RDSTART PPC_BITMASK(32, 39) +#define NPU3_RELAXED_SRC_RDEND PPC_BITMASK(40, 47) +#define NPU3_RELAXED_SRC_WRSTART PPC_BITMASK(48, 55) +#define NPU3_RELAXED_SRC_WREND PPC_BITMASK(56, 63) +#define NPU3_RELAXED_CFG2(brk) (NPU3_BLOCK_CQ_SM(0) + 0x230 + (brk) * 8) +#define NPU3_RELAXED_CFG2_CMD_CL_DMA_W PPC_BIT(0) +#define NPU3_RELAXED_CFG2_CMD_CL_DMA_W_HP PPC_BIT(1) +#define NPU3_RELAXED_CFG2_CMD_CL_DMA_INJ PPC_BIT(2) +#define NPU3_RELAXED_CFG2_CMD_PR_DMA_INJ PPC_BIT(3) +#define NPU3_RELAXED_CFG2_CMD_DMA_PR_W PPC_BIT(4) +#define NPU3_RELAXED_CFG2_CMD_CL_RD_NC_F0 PPC_BIT(5) +#define NPU3_RELAXED_CFG2_SRC_WRENA(src) PPC_BIT(32 + (src) * 4) +#define NPU3_RELAXED_CFG2_SRC_RDENA(src) PPC_BIT(33 + (src) * 4) +#define NPU3_RELAXED_CFG2_SRC_AWENA(src) PPC_BIT(34 + (src) * 4) +#define NPU3_RELAXED_CFG2_SRC_ARENA(src) PPC_BIT(35 + (src) * 4) + +/* CQ_CTL block registers */ +#define NPU3_CTL_MISC_CFG0 (NPU3_BLOCK_CQ_CTL + 0x000) +#define NPU3_CTL_MISC_CFG1 (NPU3_BLOCK_CQ_CTL + 0x008) +#define NPU3_CTL_MISC_CFG2 (NPU3_BLOCK_CQ_CTL + 0x010) +#define NPU3_CTL_MISC_CFG2_OCAPI_MODE PPC_BITMASK(0, 4) +#define NPU3_CTL_MISC_CFG2_NVLINK_MODE PPC_BITMASK(5, 9) +#define NPU3_CTL_MISC_CFG3 (NPU3_BLOCK_CQ_CTL + 0x018) +#define NPU3_CTL_BDF2PE_CFG(n) (NPU3_BLOCK_CQ_CTL + 0x180 + (n) * 8) +#define NPU3_CTL_BDF2PE_CFG_ENABLE PPC_BIT(0) +#define NPU3_CTL_BDF2PE_CFG_PE PPC_BITMASK(4, 7) +#define NPU3_CTL_BDF2PE_CFG_BDF PPC_BITMASK(8, 23) + +/* CQ_DAT block registers */ +#define NPU3_DAT_MISC_CFG1 (NPU3_BLOCK_CQ_DAT + 0x008) +#define NPU3_DAT_MISC_CFG1_OCAPI_MODE PPC_BITMASK(40, 44) +#define NPU3_DAT_MISC_CFG1_NVLINK_MODE PPC_BITMASK(45, 49) + +/* NTL block registers */ +#define NPU3_NTL_MISC_CFG2(brk) (NPU3_BLOCK_NTL(brk) + 0x000) +#define NPU3_NTL_MISC_CFG2_BRICK_ENABLE PPC_BIT(0) +#define NPU3_NTL_MISC_CFG2_NDL_RX_PARITY_ENA PPC_BIT(16) +#define NPU3_NTL_MISC_CFG2_NDL_TX_PARITY_ENA PPC_BIT(17) +#define NPU3_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA PPC_BIT(18) +#define NPU3_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA PPC_BIT(19) +#define NPU3_NTL_PRI_CFG(brk) (NPU3_BLOCK_NTL(brk) + 0x0b0) +#define NPU3_NTL_PRI_CFG_NDL PPC_BITMASK(1, 2) + +/* NPU_ATS block registers */ +#define NPU3_ATS_IODA_ADDR (NPU3_BLOCK_NPU_ATS + 0x108) +#define NPU3_ATS_IODA_ADDR_AUTO_INC PPC_BIT(0) +#define NPU3_ATS_IODA_ADDR_TBL_SEL PPC_BITMASK(11, 15) +#define NPU3_ATS_IODA_ADDR_TBL_TVT 9 +#define NPU3_ATS_IODA_ADDR_TBL_ADDR PPC_BITMASK(54, 63) +#define NPU3_ATS_IODA_DATA (NPU3_BLOCK_NPU_ATS + 0x110) +#define NPU3_ATS_IODA_TVT_XLAT_ADDR PPC_BITMASK(0, 47) +#define NPU3_ATS_IODA_TVT_TABLE_LEVEL PPC_BITMASK(48, 50) +#define NPU3_ATS_IODA_TVT_TABLE_SIZE PPC_BITMASK(51, 55) +#define NPU3_ATS_IODA_TVT_PAGE_SIZE PPC_BITMASK(59, 63) +#define NPU3_ATS_TCE_KILL (NPU3_BLOCK_NPU_ATS + 0x120) +#define NPU3_ATS_TCE_KILL_ALL PPC_BIT(0) +#define NPU3_ATS_TCE_KILL_ONE PPC_BIT(2) +#define NPU3_ATS_TCE_KILL_PE_NUMBER PPC_BITMASK(4, 7) +#define NPU3_ATS_TCE_KILL_ADDRESS PPC_BITMASK(15, 51) + +/* NPU_XTS block registers */ +#define NPU3_XTS_CFG (NPU3_BLOCK_NPU_XTS + 0x020) +#define NPU3_XTS_CFG_MMIOSD PPC_BIT(1) +#define NPU3_XTS_CFG_TRY_ATR_RO PPC_BIT(6) +#define NPU3_XTS_CFG_OPENCAPI PPC_BIT(15) +#define NPU3_XTS_CFG2 (NPU3_BLOCK_NPU_XTS + 0x028) +#define NPU3_XTS_CFG2_NO_FLUSH_ENA PPC_BIT(49) +#define NPU3_XTS_CFG2_XSL2_ENA PPC_BIT(55) +#define NPU3_XTS_CFG3 (NPU3_BLOCK_NPU_XTS + 0x068) +#define NPU3_XTS_ATSD_HYP(n) (NPU3_BLOCK_NPU_XTS + 0x100 + (n) * 8) +#define NPU3_XTS_ATSD_HYP_MSR_HV PPC_BIT(51) +#define NPU3_XTS_ATSD_HYP_LPARID PPC_BITMASK(52, 63) +#define NPU3_XTS_BDF_MAP(n) (NPU3_BLOCK_NPU_XTS + 0x4000 + (n) * 8) +#define NPU3_XTS_BDF_MAP_MAX 16 +#define NPU3_XTS_BDF_MAP_VALID PPC_BIT(0) +#define NPU3_XTS_BDF_MAP_UNFILT PPC_BIT(1) +#define NPU3_XTS_BDF_MAP_STACK PPC_BITMASK(4, 6) +#define NPU3_XTS_BDF_MAP_BRICK PPC_BITMASK(7, 9) +#define NPU3_XTS_BDF_MAP_BDF PPC_BITMASK(16, 31) +#define NPU3_XTS_BDF_MAP_XLAT PPC_BITMASK(39, 40) +#define NPU3_XTS_BDF_MAP_LPCR_PS PPC_BITMASK(41, 43) +#define NPU3_XTS_BDF_MAP_LPCR_ISL PPC_BIT(44) +#define NPU3_XTS_BDF_MAP_LPCR_TC PPC_BIT(45) +#define NPU3_XTS_BDF_MAP_LPCR_SC PPC_BIT(46) +#define NPU3_XTS_BDF_MAP_LPCR_BOT PPC_BIT(47) +#define NPU3_XTS_BDF_MAP_LPARSHORT PPC_BITMASK(48, 51) +#define NPU3_XTS_BDF_MAP_LPARID PPC_BITMASK(52, 63) +#define NPU3_XTS_PID_MAP(n) (NPU3_BLOCK_NPU_XTS + 0x8000 + (n) * 32) +#define NPU3_XTS_PID_MAP_VALID_ATRGPA0 PPC_BIT(0) +#define NPU3_XTS_PID_MAP_VALID_ATRGPA1 PPC_BIT(1) +#define NPU3_XTS_PID_MAP_VALID_ATSD PPC_BIT(2) +#define NPU3_XTS_PID_MAP_MSR PPC_BITMASK(25, 31) +#define NPU3_XTS_PID_MAP_MSR_DR PPC_BIT(25) +#define NPU3_XTS_PID_MAP_MSR_TA PPC_BIT(26) +#define NPU3_XTS_PID_MAP_MSR_HV PPC_BIT(27) +#define NPU3_XTS_PID_MAP_MSR_PR PPC_BIT(28) +#define NPU3_XTS_PID_MAP_MSR_US PPC_BIT(29) +#define NPU3_XTS_PID_MAP_MSR_SF PPC_BIT(30) +#define NPU3_XTS_PID_MAP_MSR_UV PPC_BIT(31) +#define NPU3_XTS_PID_MAP_LPARSHORT PPC_BITMASK(40, 43) +#define NPU3_XTS_PID_MAP_PID PPC_BITMASK(44, 63) + +/* NPU_MISC block registers */ +#define NPU3_MISC_CFG (NPU3_BLOCK_NPU_MISC + 0x030) +#define NPU3_MISC_CFG_IPI_PS PPC_BIT(11) +#define NPU3_MISC_CFG_IPI_PS_64K 1 +#define NPU3_MISC_CFG_IPI_OS PPC_BIT(12) +#define NPU3_MISC_CFG_IPI_OS_AIX 0 +#define NPU3_MISC_CFG_IPI_OS_LINUX 1 +#define NPU3_MISC_INT_BAR (NPU3_BLOCK_NPU_MISC + 0x098) +#define NPU3_MISC_INT_BAR_ADDR PPC_BITMASK(0, 39) +#define NPU3_MISC_BDF2PE_CFG(n) (NPU3_BLOCK_NPU_MISC + 0x100 + (n) * 8) +#define NPU3_MISC_BDF2PE_CFG_ENABLE PPC_BIT(0) +#define NPU3_MISC_BDF2PE_CFG_PE PPC_BITMASK(4, 7) +#define NPU3_MISC_BDF2PE_CFG_BDF PPC_BITMASK(8, 23) +#define NPU3_MISC_PESTB_DATA(pe) (NPU3_BLOCK_NPU_MISC + 0x200 + (pe) * 8) +#define NPU3_MISC_PESTB_DATA_DMA_STOPPED_STATE PPC_BIT(0) + +/* NPU_XTS_ATSD block registers */ +#define NPU3_XTS_ATSD_LAUNCH(n) (NPU3_BLOCK_NPU_XTS_ATSD(n) + 0x000) + +#endif /* __NPU3_REGS_H */ diff --git a/include/npu3.h b/include/npu3.h new file mode 100644 index 0000000..1c657f9 --- /dev/null +++ b/include/npu3.h @@ -0,0 +1,180 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NPU3_H +#define __NPU3_H + +#include <phys-map.h> +#include <pci.h> +#include <npu3-regs.h> + +enum npu3_dev_type { + NPU3_DEV_TYPE_UNKNOWN = 0, + NPU3_DEV_TYPE_NVLINK, + NPU3_DEV_TYPE_ANY = INT_MAX +}; + +/* Information about a currently running hw procedure */ +struct npu3_procedure { + uint16_t number; + uint16_t step; + uint32_t status; + unsigned long timeout; +}; + +/* Used to expose a hardware BAR (or logical slice of it) outside skiboot */ +struct npu3_bar { + bool enable; + uint64_t addr; + uint64_t size; + uint64_t trap; +}; + +struct npu3_dev_nvlink { + /* + * PCI virtual device. BDFN is allocated based on NPU association. + * Links connected to the same NPU will be exposed as different + * functions of the same bus/device. + */ + struct pci_virt_device *pvd; + + /* The PCI device created from pvd */ + const char *loc_code; + struct pci_device *pd; + + /* The associated GPU device */ + struct pci_device *gpu; +}; + +struct npu3_dev { + enum npu3_dev_type type; + uint32_t index; + struct dt_node *dn; + struct npu3 *npu; + struct npu3_procedure proc; + uint64_t link_speed; + + struct npu3_bar ntl_bar; + struct npu3_bar genid_bar; + + /* Associated PHY information */ + uint32_t ob_chiplet; + uint32_t phy_lane_mask; + + /* For NPU3_DEV_TYPE_NVLINK */ + struct npu3_dev_nvlink nvlink; +}; + +struct npu3_nvlink { + struct phb phb; + uint32_t ctx_ref[NPU3_XTS_BDF_MAP_MAX]; +}; + +#define NPU3_LINKS_PER_NPU 4 + +struct npu3 { + uint32_t index; + struct dt_node *dt_node; + uint32_t chip_id; + uint64_t xscom_base; + + /* Global MMIO window (all NPU regs) */ + uint64_t regs[2]; + + uint32_t irq_base; + struct lock lock; + bool tx_zcal_complete; + + struct npu3_dev devices[NPU3_LINKS_PER_NPU]; + + /* Shared by any NPU3_DEV_TYPE_NVLINK devices */ + struct npu3_nvlink nvlink; +}; + +static inline struct npu3 *npu3_phb_to_npu(struct phb *phb) +{ + assert(phb->phb_type == phb_type_npu_v3); + return container_of(phb, struct npu3, nvlink.phb); +} + +/* Chip-scope index of the link */ +static inline uint32_t npu3_chip_dev_index(struct npu3_dev *dev) +{ + return dev->npu->index * NPU3_LINKS_PER_NPU + dev->index; +} + +struct npu3_dev *npu3_next_dev(struct npu3 *npu, struct npu3_dev *dev, + enum npu3_dev_type type); + +#define npu3_for_each_dev_type(dev, npu, type) \ + for (dev = NULL; (dev = npu3_next_dev(npu, dev, type));) + +#define npu3_for_each_nvlink_dev(dev, npu) \ + npu3_for_each_dev_type(dev, npu, NPU3_DEV_TYPE_NVLINK) + +#define npu3_for_each_dev(dev, npu) \ + npu3_for_each_dev_type(dev, npu, NPU3_DEV_TYPE_ANY) + +struct npu3 *npu3_next_nvlink_npu(struct npu3 *npu, uint32_t chip_id); + +#define npu3_for_each_chip_nvlink_npu(npu, chip_id) \ + for (npu = NULL; (npu = npu3_next_nvlink_npu(npu, chip_id));) + +#define NPU3_ANY_CHIP INT_MAX +#define npu3_for_each_nvlink_npu(npu) \ + npu3_for_each_chip_nvlink_npu(npu, NPU3_ANY_CHIP) + +void npu3_init_nvlink(struct npu3 *npu); +void npu3_dev_enable_bars(struct npu3_dev *dev, bool enable); +int64_t npu3_dev_reset(struct npu3_dev *dev); + +uint32_t npu3_chip_possible_gpus(void); +int32_t npu3_dev_gpu_index(struct npu3_dev *dev); + +/* NPU RING register access */ +void npu3_write(struct npu3 *npu, uint64_t reg, uint64_t val); +uint64_t npu3_read(struct npu3 *npu, uint64_t reg); +void npu3_write_4b(struct npu3 *npu, uint64_t reg, uint32_t val); +uint32_t npu3_read_4b(struct npu3 *npu, uint64_t reg); + +/* Link flags */ +#define NPU3_DEV_PCI_LINKED 0x1 +#define NPU3_DEV_DL_RESET 0x2 + +void npu3_pvd_flag_set(struct npu3_dev *dev, uint8_t flag); +void npu3_pvd_flag_clear(struct npu3_dev *dev, uint8_t flag); + +/* PHY procedures */ +#define NPU3_PROC_STATUS_MASK 0xc000000f +#define NPU3_PROC_INPROGRESS (1 << 31) +#define NPU3_PROC_COMPLETE (1 << 30) +#define NPU3_PROC_NEXT (1 << 29) +#define NPU3_PROC_FAILED 2 +#define NPU3_PROC_ABORTED 3 +#define NPU3_PROC_UNSUPPORTED 4 + +void npu3_dev_procedure_init(struct npu3_dev *dev, uint32_t pnum); +uint32_t npu3_dev_procedure_status(struct npu3_dev *dev); + +/* OPAL entry points */ +int64_t npu3_init_context(struct phb *phb, uint64_t msr, uint64_t bdf); +int64_t npu3_destroy_context(struct phb *phb, uint64_t bdf); +int64_t npu3_map_lpar(struct phb *phb, uint64_t bdf, uint64_t lparid, + uint64_t lpcr); +int64_t npu3_set_relaxed_order(struct phb *phb, uint32_t gcid, int pec, + bool enable); + +#endif /* __NPU3_H */ diff --git a/include/pci.h b/include/pci.h index c7c15d0..b840409 100644 --- a/include/pci.h +++ b/include/pci.h @@ -353,6 +353,7 @@ enum phb_type { phb_type_pcie_v4, phb_type_npu_v2, phb_type_npu_v2_opencapi, + phb_type_npu_v3, }; diff --git a/include/platform.h b/include/platform.h index ee22752..0b04385 100644 --- a/include/platform.h +++ b/include/platform.h @@ -10,6 +10,7 @@ struct pci_device; struct pci_slot; struct errorlog; struct npu2; +struct npu3; enum resource_id { RESOURCE_ID_KERNEL, @@ -115,8 +116,9 @@ struct platform { /* OpenCAPI platform-specific I2C information */ const struct platform_ocapi *ocapi; - /* NPU2 device detection */ + /* NPU device detection */ void (*npu2_device_detect)(struct npu2 *npu); + void (*npu3_device_detect)(struct npu3 *npu); /* * Probe platform, return true on a match, called before diff --git a/include/skiboot.h b/include/skiboot.h index 9b7942f..6cac1cf 100644 --- a/include/skiboot.h +++ b/include/skiboot.h @@ -195,6 +195,7 @@ extern int preload_capp_ucode(void); extern void preload_io_vpd(void); extern void probe_npu(void); extern void probe_npu2(void); +extern void probe_npu3(void); extern void uart_init(void); extern void mbox_init(void); extern void early_uart_init(void); diff --git a/include/xscom-p9-regs.h b/include/xscom-p9-regs.h index b322605..15bdc6f 100644 --- a/include/xscom-p9-regs.h +++ b/include/xscom-p9-regs.h @@ -87,4 +87,23 @@ #define EC_PPM_SPECIAL_WKUP_OCC 0x010C #define EC_PPM_SPECIAL_WKUP_HYP 0x010D +#define OB_BASE(ob) (((ob) + 9) << 24) +#define OB_CPLT_CONF1(ob) (OB_BASE(ob) + 0x9) +#define OB_CPLT_CONF1_NV_IOVALID(brk) PPC_BIT(6 + (brk)) +#define OB_INDIRECT(ob) ((OB_BASE(ob) + 0x10c3f) | PPC_BIT(0)) + +/* PPE SRAM: Indirect address/data port */ +#define OB_PPE_CSAR(ob) (OB_BASE(ob) + 0x1104d) +#define OB_PPE_CSAR_SRAM_ADDR PPC_BITMASK(16, 28) +#define OB_PPE_CSDR(ob) (OB_BASE(ob) + 0x1104e) + +/* PPE SRAM: Indirect registers */ +#define OB_PPE_SALT_CMD 0x1fe6 +#define OB_PPE_SALT_CMD_READY PPC_BIT(0) +#define OB_PPE_SALT_CMD_RW PPC_BIT(1) +#define OB_PPE_SALT_CMD_ERR PPC_BIT(2) +#define OB_PPE_SALT_CMD_LINKNUM PPC_BITMASK(15, 18) +#define OB_PPE_SALT_CMD_REG PPC_BITMASK(19, 31) +#define OB_PPE_SALT_CMD_DATA PPC_BITMASK(32, 63) + #endif /* __XSCOM_P9_REGS_H__ */ |