aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/Makefile.inc3
-rw-r--r--hw/npu-opal.c17
-rw-r--r--hw/npu3-hw-procedures.c792
-rw-r--r--hw/npu3-nvlink.c1828
-rw-r--r--hw/npu3.c549
5 files changed, 3 insertions, 3186 deletions
diff --git a/hw/Makefile.inc b/hw/Makefile.inc
index 37256d3..c254fcb 100644
--- a/hw/Makefile.inc
+++ b/hw/Makefile.inc
@@ -8,8 +8,7 @@ HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o
HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o cache-p9.o
-HW_OBJS += npu-opal.o npu3.o npu3-nvlink.o npu3-hw-procedures.o
-HW_OBJS += ocmb.o xive2.o
+HW_OBJS += npu-opal.o ocmb.o xive2.o
HW=hw/built-in.a
include $(SRC)/hw/fsp/Makefile.inc
diff --git a/hw/npu-opal.c b/hw/npu-opal.c
index 412ea46..c7f5f9f 100644
--- a/hw/npu-opal.c
+++ b/hw/npu-opal.c
@@ -7,7 +7,6 @@
#include <pci.h>
#include <phb4.h>
#include <npu2.h>
-#include <npu3.h>
static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused,
uint64_t msr, uint64_t bdf)
@@ -20,9 +19,6 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused,
if (phb->phb_type == phb_type_npu_v2)
return npu2_init_context(phb, msr, bdf);
- if (phb->phb_type == phb_type_npu_v3)
- return npu3_init_context(phb, msr, bdf);
-
return OPAL_PARAMETER;
}
opal_call(OPAL_NPU_INIT_CONTEXT, opal_npu_init_context, 4);
@@ -38,9 +34,6 @@ static int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused,
if (phb->phb_type == phb_type_npu_v2)
return npu2_destroy_context(phb, bdf);
- if (phb->phb_type == phb_type_npu_v3)
- return npu3_destroy_context(phb, bdf);
-
return OPAL_PARAMETER;
}
opal_call(OPAL_NPU_DESTROY_CONTEXT, opal_npu_destroy_context, 3);
@@ -56,9 +49,6 @@ static int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
if (phb->phb_type == phb_type_npu_v2)
return npu2_map_lpar(phb, bdf, lparid, lpcr);
- if (phb->phb_type == phb_type_npu_v3)
- return npu3_map_lpar(phb, bdf, lparid, lpcr);
-
return OPAL_PARAMETER;
}
opal_call(OPAL_NPU_MAP_LPAR, opal_npu_map_lpar, 4);
@@ -89,13 +79,10 @@ static int64_t npu_set_relaxed_order(uint32_t gcid, int pec, bool enable)
int64_t rc;
for_each_phb(phb) {
- if (phb->phb_type == phb_type_npu_v2)
- rc = npu2_set_relaxed_order(phb, gcid, pec, enable);
- else if (phb->phb_type == phb_type_npu_v3)
- rc = npu3_set_relaxed_order(phb, gcid, pec, enable);
- else
+ if (phb->phb_type != phb_type_npu_v2)
continue;
+ rc = npu2_set_relaxed_order(phb, gcid, pec, enable);
if (rc)
return rc;
}
diff --git a/hw/npu3-hw-procedures.c b/hw/npu3-hw-procedures.c
deleted file mode 100644
index 098e6e4..0000000
--- a/hw/npu3-hw-procedures.c
+++ /dev/null
@@ -1,792 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-/*
- * Copyright 2019 IBM Corp.
- */
-
-#include <skiboot.h>
-#include <npu3.h>
-#include <npu3-regs.h>
-#include <timebase.h>
-#include <xscom.h>
-#include <xscom-p9-regs.h>
-
-#define NPU3DEVLOG(l, dev, fmt, a...) \
- prlog(l, "NPU[%d:%d:%d]: " fmt, \
- (dev)->npu->chip_id, \
- (dev)->npu->index, \
- (dev)->index, ##a)
-#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a)
-#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a)
-#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a)
-
-/*
- * The documentation for the PHY training is written in terms of bits within an
- * actual register so we use that representation here.
- */
-struct npu3_phy_reg {
- uint64_t offset;
- uint64_t mask;
-};
-
-static struct npu3_phy_reg
-NPU3_PHY_RX_RUN_LANE = { 0x0c8, PPC_BIT(48) },
-NPU3_PHY_RX_IORESET = { 0x096, PPC_BIT(63) },
-NPU3_PHY_TX_IORESET = { 0x113, PPC_BIT(48) },
-NPU3_PHY_RX_PR_RESET = { 0x096, PPC_BIT(62) },
-NPU3_PHY_RX_LANE_ANA_PDWN = { 0x002, PPC_BIT(54) },
-NPU3_PHY_RX_LANE_DIG_PDWN = { 0x088, PPC_BIT(48) },
-NPU3_PHY_RX_PR_PHASE_STEP = { 0x08a, PPC_BITMASK(60, 63) },
-NPU3_PHY_TX_LANE_PDWN = { 0x101, PPC_BIT(48) },
-NPU3_PHY_RX_RUN_DCCAL = { 0x0c8, PPC_BIT(49) },
-NPU3_PHY_RX_DCCAL_DONE = { 0x0ca, PPC_BIT(49) },
-NPU3_PHY_RX_LANE_BUSY = { 0x0ca, PPC_BIT(50) },
-NPU3_PHY_RX_B_BANK_CONTROLS = { 0x002, PPC_BITMASK(58, 63) },
-NPU3_PHY_TX_UNLOAD_CLK_DISABLE = { 0x103, PPC_BIT(56) },
-NPU3_PHY_TX_FIFO_INIT = { 0x105, PPC_BIT(53) },
-NPU3_PHY_TX_RXCAL = { 0x103, PPC_BIT(57) },
-NPU3_PHY_RX_INIT_DONE = { 0x0ca, PPC_BIT(48) },
-NPU3_PHY_RX_PR_EDGE_TRACK_CNTL = { 0x092, PPC_BITMASK(48, 49) },
-NPU3_PHY_RX_PR_FW_OFF = { 0x08a, PPC_BIT(56) },
-NPU3_PHY_RX_PR_FW_INERTIA_AMT = { 0x08a, PPC_BITMASK(57, 59) },
-NPU3_PHY_RX_CFG_LTE_MC = { 0x000, PPC_BITMASK(60, 63) },
-NPU3_PHY_RX_A_INTEG_COARSE_GAIN = { 0x00a, PPC_BITMASK(48, 51) },
-NPU3_PHY_RX_B_INTEG_COARSE_GAIN = { 0x026, PPC_BITMASK(48, 51) },
-NPU3_PHY_RX_E_INTEG_COARSE_GAIN = { 0x030, PPC_BITMASK(48, 51) },
-
-/* These registers are per-PHY, not per lane */
-NPU3_PHY_TX_ZCAL_SWO_EN = { 0x3c9, PPC_BIT(48) },
-NPU3_PHY_TX_ZCAL_REQ = { 0x3c1, PPC_BIT(49) },
-NPU3_PHY_TX_ZCAL_DONE = { 0x3c1, PPC_BIT(50) },
-NPU3_PHY_TX_ZCAL_ERROR = { 0x3c1, PPC_BIT(51) },
-NPU3_PHY_TX_ZCAL_N = { 0x3c3, PPC_BITMASK(48, 56) },
-NPU3_PHY_TX_ZCAL_P = { 0x3c5, PPC_BITMASK(48, 56) },
-NPU3_PHY_TX_PSEG_PRE_EN = { 0x34d, PPC_BITMASK(51, 55) },
-NPU3_PHY_TX_PSEG_PRE_SELECT = { 0x34d, PPC_BITMASK(56, 60) },
-NPU3_PHY_TX_NSEG_PRE_EN = { 0x34f, PPC_BITMASK(51, 55) },
-NPU3_PHY_TX_NSEG_PRE_SELECT = { 0x34f, PPC_BITMASK(56, 60) },
-NPU3_PHY_TX_PSEG_POST_EN = { 0x361, PPC_BITMASK(49, 55) },
-NPU3_PHY_TX_PSEG_POST_SELECT = { 0x361, PPC_BITMASK(56, 62) },
-NPU3_PHY_TX_NSEG_POST_EN = { 0x363, PPC_BITMASK(49, 55) },
-NPU3_PHY_TX_NSEG_POST_SELECT = { 0x363, PPC_BITMASK(56, 62) },
-NPU3_PHY_TX_PSEG_MARGINPU_EN = { 0x351, PPC_BITMASK(48, 55) },
-NPU3_PHY_TX_NSEG_MARGINPU_EN = { 0x353, PPC_BITMASK(48, 55) },
-NPU3_PHY_TX_PSEG_MARGINPD_EN = { 0x351, PPC_BITMASK(56, 63) },
-NPU3_PHY_TX_NSEG_MARGINPD_EN = { 0x353, PPC_BITMASK(56, 63) },
-NPU3_PHY_TX_MARGINPU_SELECT = { 0x355, PPC_BITMASK(48, 55) },
-NPU3_PHY_TX_MARGINPD_SELECT = { 0x355, PPC_BITMASK(56, 63) },
-NPU3_PHY_TX_PSEG_MAIN_EN = { 0x357, PPC_BITMASK(51, 57) },
-NPU3_PHY_TX_NSEG_MAIN_EN = { 0x359, PPC_BITMASK(51, 57) },
-NPU3_PHY_RX_CLKDIST_PDWN = { 0x204, PPC_BITMASK(48, 50) },
-NPU3_PHY_RX_IREF_PDWN = { 0x230, PPC_BIT(54) },
-NPU3_PHY_TX_CLKDIST_PDWN = { 0x305, PPC_BITMASK(48, 50) },
-NPU3_PHY_RX_CTL_DATASM_CLKDIST_PDWN = { 0x2e0, PPC_BIT(60) };
-
-static uint64_t npu3_phy_scom(struct npu3_dev *dev, struct npu3_phy_reg *reg,
- int lane)
-{
- uint64_t scom;
-
- /* Don't specify a lane for a non-per-lane register */
- if (lane >= 0)
- assert(reg->offset < 0x200);
- else
- assert(reg->offset >= 0x200);
-
- scom = OB_INDIRECT(dev->ob_chiplet);
- scom = SETFIELD(PPC_BITMASK(12, 21), scom, reg->offset);
-
- if (lane > 0)
- scom = SETFIELD(PPC_BITMASK(27, 31), scom, lane);
-
- return scom;
-}
-
-static void npu3_phy_write_lane(struct npu3_dev *dev, struct npu3_phy_reg *reg,
- int lane, uint64_t val)
-{
- struct npu3 *npu = dev->npu;
- uint64_t scom, scom_val;
-
- scom = npu3_phy_scom(dev, reg, lane);
-
- xscom_read(npu->chip_id, scom, &scom_val);
- scom_val = SETFIELD(reg->mask, scom_val, val);
- xscom_write(npu->chip_id, scom, scom_val);
-}
-
-static uint64_t npu3_phy_read_lane(struct npu3_dev *dev,
- struct npu3_phy_reg *reg,
- int lane)
-{
- struct npu3 *npu = dev->npu;
- uint64_t scom, scom_val;
-
- scom = npu3_phy_scom(dev, reg, lane);
- xscom_read(npu->chip_id, scom, &scom_val);
-
- return GETFIELD(reg->mask, scom_val);
-}
-
-static inline void npu3_phy_write(struct npu3_dev *dev,
- struct npu3_phy_reg *reg,
- uint64_t val)
-{
- npu3_phy_write_lane(dev, reg, -1, val);
-}
-
-static inline uint64_t npu3_phy_read(struct npu3_dev *dev,
- struct npu3_phy_reg *reg)
-{
- return npu3_phy_read_lane(dev, reg, -1);
-}
-
-struct procedure {
- const char *name;
- uint32_t (*steps[])(struct npu3_dev *);
-};
-
-#define DEFINE_PROCEDURE(NAME, STEPS...) \
-static struct procedure procedure_##NAME = { \
- .name = #NAME, \
- .steps = { NAME, ##STEPS } \
-}
-
-static uint32_t stop(struct npu3_dev *npu_dev __unused)
-{
- return NPU3_PROC_COMPLETE | NPU3_PROC_ABORTED;
-}
-
-DEFINE_PROCEDURE(stop);
-
-static uint32_t nop(struct npu3_dev *npu_dev __unused)
-{
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(nop);
-
-static void set_iovalid(struct npu3_dev *dev, bool raise)
-{
- struct npu3 *npu = dev->npu;
- uint64_t reg, val;
-
- reg = OB_CPLT_CONF1(dev->ob_chiplet);
-
- xscom_read(npu->chip_id, reg, &val);
- val = SETFIELD(OB_CPLT_CONF1_NV_IOVALID(dev->index), val, raise);
- xscom_write(npu->chip_id, reg, val);
-}
-
-#define NPU3_PHY_LANES 24
-
-#define npu3_for_each_lane(lane, dev) \
- for (lane = 0; lane < NPU3_PHY_LANES; lane++) \
- if (dev->phy_lane_mask & PPC_BIT32(lane)) \
-
-static uint32_t phy_reset(struct npu3_dev *dev)
-{
- uint32_t lane;
-
- set_iovalid(dev, false);
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_LANE, lane, 0);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t phy_reset_wait(struct npu3_dev *dev)
-{
- int lane;
-
- /* Wait for all lanes to become inactive */
- npu3_for_each_lane(lane, dev)
- if (npu3_phy_read_lane(dev, &NPU3_PHY_RX_LANE_BUSY, lane))
- return NPU3_PROC_INPROGRESS;
-
- npu3_for_each_lane(lane, dev) {
- /* Set lane in reset */
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_IORESET, lane, 1);
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_IORESET, lane, 1);
-
- /* Release lane from reset */
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_IORESET, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_IORESET, lane, 0);
-
- /* Reset the phase rotator */
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_RESET, lane, 1);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_RESET, lane, 0);
- }
-
- return NPU3_PROC_NEXT;
-}
-
-/* Procedure 1.2.3 - Initialise I/O PHY Registers */
-static uint32_t phy_reset_complete(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev) {
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_ANA_PDWN, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_DIG_PDWN, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_PHASE_STEP, lane, 0xc);
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_LANE_PDWN, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_FW_INERTIA_AMT, lane, 4);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_CFG_LTE_MC, lane, 3);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_A_INTEG_COARSE_GAIN, lane, 11);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_B_INTEG_COARSE_GAIN, lane, 11);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_E_INTEG_COARSE_GAIN, lane, 11);
- }
-
- set_iovalid(dev, true);
-
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(phy_reset, phy_reset_wait, phy_reset_complete);
-
-/* Procedure 1.2.6 - I/O PHY Tx Impedance Calibration */
-static uint32_t phy_tx_zcal(struct npu3_dev *dev)
-{
- if (dev->npu->tx_zcal_complete)
- return NPU3_PROC_COMPLETE;
-
- /* Turn off SW enable and enable zcal state machine */
- npu3_phy_write(dev, &NPU3_PHY_TX_ZCAL_SWO_EN, 0);
-
- /* Start impedance calibration state machine */
- npu3_phy_write(dev, &NPU3_PHY_TX_ZCAL_REQ, 1);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t phy_tx_zcal_wait(struct npu3_dev *dev)
-{
- if (npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_ERROR))
- return NPU3_PROC_COMPLETE | NPU3_PROC_FAILED;
-
- if (!npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_DONE))
- return NPU3_PROC_INPROGRESS;
-
- return NPU3_PROC_NEXT;
-}
-
-#define MARGIN_RATIO 0
-#define FFE_PRE_COEFF 0
-#define FFE_POST_COEFF 0
-
-#define PRE_WIDTH 5
-#define POST_WIDTH 7
-#define MAIN_WIDTH 7
-#define ZCAL_MIN (16 * 2)
-#define ZCAL_MAX (33 * 2)
-#define PRECURSOR_X2_MAX (4 * 2 + 1)
-#define POSTCURSOR_X2_MAX (6 * 2 + 1)
-#define MARGIN_X2_MAX (8 * 2)
-#define MAIN_X2_MAX (6 * 2 + 1)
-#define TOTAL_X2_MAX (PRECURSOR_X2_MAX + POSTCURSOR_X2_MAX + \
- 2 * MARGIN_X2_MAX + MAIN_X2_MAX)
-
-static uint32_t therm(uint32_t dec)
-{
- return (0x1 << dec) - 1;
-}
-
-static uint32_t therm_with_half(uint32_t dec, uint8_t width)
-{
- /* If the LSB of the 2r equivalent is on, then we need to set the 2r bit (MSB) */
- uint32_t half_on = (dec & 0x1) << (width - 1);
-
- /* Shift the 2r equivalent to a 1r value and convert to a thermometer code. */
- uint32_t x1_equiv = ((1 << (dec >> 1)) - 1);
-
- /* Combine 1r equivalent thermometer code + the 2r MSB value. */
- return half_on | x1_equiv;
-}
-
-static uint32_t phy_tx_zcal_calculate(struct npu3_dev *dev)
-{
- int p_value, n_value;
- uint32_t zcal_n;
- uint32_t zcal_p;
- uint32_t p_main_enable = MAIN_X2_MAX;
- uint32_t p_margin_pu_enable = MARGIN_X2_MAX;
- uint32_t p_margin_pd_enable = MARGIN_X2_MAX;
- uint32_t p_precursor_select;
- uint32_t p_postcursor_select;
- uint32_t margin_pu_select;
- uint32_t n_main_enable = MAIN_X2_MAX;
- uint32_t n_margin_pu_enable = MARGIN_X2_MAX;
- uint32_t n_margin_pd_enable = MARGIN_X2_MAX;
- uint32_t n_precursor_select;
- uint32_t n_postcursor_select;
- uint32_t margin_pd_select;
- uint32_t margin_select;
-
- /* Convert the value from 8R to 2R by / 4 */
- zcal_n = npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_N) / 4;
- zcal_p = npu3_phy_read(dev, &NPU3_PHY_TX_ZCAL_P) / 4;
-
- /*
- * Again, if the hardware detects an unexpected condition it's
- * better just to fail loudly.
- */
- if (zcal_n < ZCAL_MIN || zcal_n > ZCAL_MAX ||
- zcal_p < ZCAL_MIN || zcal_p > ZCAL_MAX)
- return NPU3_PROC_COMPLETE | NPU3_PROC_FAILED;
-
- p_value = zcal_p - TOTAL_X2_MAX;
- p_precursor_select = p_value * FFE_PRE_COEFF / 128;
- p_postcursor_select = p_value * FFE_POST_COEFF / 128;
- margin_pu_select = p_value * MARGIN_RATIO / 256;
-
- if (p_value % 2) {
- p_main_enable--;
- p_value++;
- }
-
- while (p_value < 0) {
- if (p_main_enable > 1) {
- p_main_enable -= 2;
- } else if (p_margin_pu_enable + p_margin_pd_enable > 0) {
- if (p_margin_pu_enable == p_margin_pd_enable)
- p_margin_pd_enable -= 2;
- else
- p_margin_pu_enable -= 2;
- }
- p_value += 2;
- }
-
- n_value = zcal_n - TOTAL_X2_MAX;
- n_precursor_select = n_value * FFE_PRE_COEFF / 128;
- n_postcursor_select = n_value * FFE_POST_COEFF / 128;
- margin_pd_select = p_value * MARGIN_RATIO / 256;
-
- if (n_value % 2) {
- n_main_enable--;
- n_value++;
- }
-
- while (n_value < 0) {
- if (n_main_enable > 1) {
- n_main_enable -= 2;
- } else if (n_margin_pu_enable + n_margin_pd_enable > 0) {
- if (n_margin_pu_enable == n_margin_pd_enable)
- n_margin_pd_enable -= 2;
- else
- n_margin_pu_enable -= 2;
- }
- n_value += 2;
- }
-
- margin_select = therm((margin_pu_select + 1) / 2) &
- therm((margin_pd_select + 1) / 2) &
- therm((p_margin_pu_enable + 1) / 2) &
- therm((p_margin_pd_enable + 1) / 2) &
- therm((n_margin_pu_enable + 1) / 2) &
- therm((n_margin_pd_enable + 1) / 2);
-
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_PRE_EN, therm_with_half(PRECURSOR_X2_MAX, PRE_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_PRE_SELECT, therm_with_half(p_precursor_select, PRE_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_POST_EN, therm_with_half(POSTCURSOR_X2_MAX, POST_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_POST_SELECT, therm_with_half(p_postcursor_select, POST_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_MARGINPU_EN, therm((p_margin_pu_enable + 1) / 2));
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_MARGINPD_EN, therm((p_margin_pd_enable + 1) / 2));
- npu3_phy_write(dev, &NPU3_PHY_TX_PSEG_MAIN_EN, therm_with_half(p_main_enable, MAIN_WIDTH));
-
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_PRE_EN, therm_with_half(PRECURSOR_X2_MAX, PRE_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_PRE_SELECT, therm_with_half(n_precursor_select, PRE_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_POST_EN, therm_with_half(POSTCURSOR_X2_MAX, POST_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_POST_SELECT, therm_with_half(n_postcursor_select, POST_WIDTH));
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_MARGINPU_EN, therm((n_margin_pu_enable + 1) / 2));
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_MARGINPD_EN, therm((n_margin_pd_enable + 1) / 2));
- npu3_phy_write(dev, &NPU3_PHY_TX_NSEG_MAIN_EN, therm_with_half(n_main_enable, MAIN_WIDTH));
-
- npu3_phy_write(dev, &NPU3_PHY_TX_MARGINPU_SELECT, therm(margin_select + 1) / 2);
- npu3_phy_write(dev, &NPU3_PHY_TX_MARGINPD_SELECT, therm(margin_select + 1) / 2);
-
- dev->npu->tx_zcal_complete = true;
-
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(phy_tx_zcal, phy_tx_zcal_wait, phy_tx_zcal_calculate);
-
-/* Procedure 1.2.4 - I/O PHY DC Calibration */
-static uint32_t phy_rx_dccal(struct npu3_dev *dev)
-{
- int lane;
-
- set_iovalid(dev, false);
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_FW_OFF, lane, 1);
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_DCCAL, lane, 1);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t phy_rx_dccal_complete(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev)
- if (!npu3_phy_read_lane(dev, &NPU3_PHY_RX_DCCAL_DONE, lane))
- return NPU3_PROC_INPROGRESS;
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_DCCAL, lane, 0);
-
- npu3_for_each_lane(lane, dev) {
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_B_BANK_CONTROLS, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_EDGE_TRACK_CNTL, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_PR_FW_OFF, lane, 0);
- }
-
- return NPU3_PROC_NEXT;
-}
-
-/* Procedure 1.2.5 - IO PHY Tx FIFO Init */
-static uint32_t phy_tx_fifo_init(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev) {
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_UNLOAD_CLK_DISABLE, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_FIFO_INIT, lane, 1);
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_UNLOAD_CLK_DISABLE, lane, 1);
- }
-
- set_iovalid(dev, true);
-
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(phy_rx_dccal, phy_rx_dccal_complete, phy_tx_fifo_init);
-
-/* Procedure 1.2.8 - Enable Downstream Link Training */
-static uint32_t phy_enable_tx_rxcal(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_RXCAL, lane, 1);
-
- return NPU3_PROC_COMPLETE;
-}
-DEFINE_PROCEDURE(phy_enable_tx_rxcal);
-
-/* Procedure 1.2.9 - Disable Downstream Link Training */
-static uint32_t phy_disable_tx_rxcal(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_RXCAL, lane, 0);
-
- return NPU3_PROC_COMPLETE;
-}
-DEFINE_PROCEDURE(phy_disable_tx_rxcal);
-
-/* Procedure 1.2.7 - I/O PHY Upstream Link Training */
-static uint32_t phy_rx_training(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev)
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_RUN_LANE, lane, 1);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t phy_rx_training_wait(struct npu3_dev *dev)
-{
- int lane;
-
- npu3_for_each_lane(lane, dev)
- if (!npu3_phy_read_lane(dev, &NPU3_PHY_RX_INIT_DONE, lane))
- return NPU3_PROC_INPROGRESS;
-
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(phy_rx_training, phy_rx_training_wait);
-
-static void npu3_dev_fence_set(struct npu3_dev *dev, uint8_t state)
-{
- struct npu3 *npu = dev->npu;
- uint64_t val;
-
- val = npu3_read(npu, NPU3_NTL_MISC_CFG1(dev->index));
- val = SETFIELD(NPU3_NTL_MISC_CFG1_NTL_RESET, val, state);
- npu3_write(npu, NPU3_NTL_MISC_CFG1(dev->index), val);
-}
-
-static uint8_t npu3_dev_fence_get(struct npu3_dev *dev)
-{
- uint64_t val;
-
- val = npu3_read(dev->npu, NPU3_NTL_CQ_FENCE_STATUS(dev->index));
- return GETFIELD(NPU3_NTL_CQ_FENCE_STATUS_FIELD, val);
-}
-
-/* Procedure 1.2.1 - Reset NPU/NDL */
-static uint32_t reset_ntl(struct npu3_dev *dev)
-{
- struct npu3 *npu = dev->npu;
- uint64_t val;
- int lane;
-
- set_iovalid(dev, true);
-
- /* Power on clocks */
- npu3_phy_write(dev, &NPU3_PHY_RX_CLKDIST_PDWN, 0);
- npu3_phy_write(dev, &NPU3_PHY_RX_IREF_PDWN, 1);
- npu3_phy_write(dev, &NPU3_PHY_TX_CLKDIST_PDWN, 0);
- npu3_phy_write(dev, &NPU3_PHY_RX_CTL_DATASM_CLKDIST_PDWN, 0);
-
- npu3_for_each_lane(lane, dev) {
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_ANA_PDWN, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_RX_LANE_DIG_PDWN, lane, 0);
- npu3_phy_write_lane(dev, &NPU3_PHY_TX_LANE_PDWN, lane, 0);
- }
-
- /* Write PRI */
- val = SETFIELD(NPU3_NTL_PRI_CFG_NDL, 0ull, dev->index);
- npu3_write(npu, NPU3_NTL_PRI_CFG(dev->index), val);
-
- /* Disable parity checking */
- val = npu3_read(npu, NPU3_NTL_MISC_CFG2(dev->index));
- val &= ~(NPU3_NTL_MISC_CFG2_NDL_RX_PARITY_ENA |
- NPU3_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
- NPU3_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA);
- npu3_write(npu, NPU3_NTL_MISC_CFG2(dev->index), val);
-
- if (dev->type == NPU3_DEV_TYPE_NVLINK)
- npu3_pvd_flag_clear(dev, NPU3_DEV_DL_RESET);
-
- npu3_dev_fence_set(dev, NPU3_NTL_CQ_FENCE_STATUS_FULL);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t reset_ndl(struct npu3_dev *dev)
-{
- struct npu3 *npu = dev->npu;
- uint64_t reg;
- uint32_t val32;
-
- if (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_FULL)
- return NPU3_PROC_INPROGRESS;
-
- reg = NPU3_DLPL_CTL(dev->index);
- val32 = npu3_read_4b(npu, reg);
- val32 |= NPU3_DLPL_CTL_RESET_RX | NPU3_DLPL_CTL_RESET_MISC;
- npu3_write_4b(npu, reg, val32);
-
- val32 = npu3_read_4b(npu, reg);
- val32 &= ~(NPU3_DLPL_CTL_RESET_RX | NPU3_DLPL_CTL_RESET_MISC);
- npu3_write_4b(npu, reg, val32);
-
- reg = NPU3_DLPL_CFG(dev->index);
- val32 = NPU3_DLPL_CFG_PRI_BYTESWAP;
- npu3_write_4b(npu, reg, val32);
-
- /* Clear FIR bits */
- for (uint32_t i = 0; i < NPU3_FIR_MAX; i++)
- xscom_write(npu->chip_id, npu->xscom_base + NPU3_FIR(i), 0ull);
-
- npu3_dev_fence_set(dev, NPU3_NTL_CQ_FENCE_STATUS_HALF);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t reset_ntl_release(struct npu3_dev *dev)
-{
- struct npu3 *npu = dev->npu;
- uint32_t i = dev->index;
-
- if (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_HALF)
- return NPU3_PROC_INPROGRESS;
-
- /* Credit setup */
- npu3_write(npu, NPU3_NTL_CREQ_HDR_CRED_SND(i), 0x0200000000000000);
- npu3_write(npu, NPU3_NTL_PRB_HDR_CRED_SND(i), 0x0200000000000000);
- npu3_write(npu, NPU3_NTL_ATR_HDR_CRED_SND(i), 0x0200000000000000);
- npu3_write(npu, NPU3_NTL_RSP_HDR_CRED_SND(i), 0x0200000000000000);
- npu3_write(npu, NPU3_NTL_CREQ_DAT_CRED_SND(i), 0x1000000000000000);
- npu3_write(npu, NPU3_NTL_RSP_DAT_CRED_SND(i), 0x1000000000000000);
-
- npu3_write(npu, NPU3_NTL_CREQ_HDR_CRED_RCV(i), 0x0000be0000000000);
- npu3_write(npu, NPU3_NTL_DGD_HDR_CRED_RCV(i), 0x0000640000000000);
- npu3_write(npu, NPU3_NTL_ATSD_HDR_CRED_RCV(i), 0x0000200000000000);
- npu3_write(npu, NPU3_NTL_RSP_HDR_CRED_RCV(i), 0x0000be0000000000);
- npu3_write(npu, NPU3_NTL_CREQ_DAT_CRED_RCV(i), 0x0001000000000000);
- npu3_write(npu, NPU3_NTL_RSP_DAT_CRED_RCV(i), 0x0001000000000000);
-
- npu3_dev_fence_set(dev, NPU3_NTL_CQ_FENCE_STATUS_NONE);
-
- return NPU3_PROC_NEXT;
-}
-
-static uint32_t reset_ntl_finish(struct npu3_dev *dev) {
- struct npu3 *npu = dev->npu;
- uint64_t val;
-
- if (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_NONE)
- return NPU3_PROC_INPROGRESS;
-
- /* Enable parity checking */
- val = npu3_read(npu, NPU3_NTL_MISC_CFG2(dev->index));
- val |= NPU3_NTL_MISC_CFG2_NDL_RX_PARITY_ENA |
- NPU3_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
- NPU3_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA;
- npu3_write(npu, NPU3_NTL_MISC_CFG2(dev->index), val);
-
- if (dev->type == NPU3_DEV_TYPE_NVLINK)
- npu3_pvd_flag_set(dev, NPU3_DEV_DL_RESET);
-
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(reset_ntl, reset_ndl, reset_ntl_release, reset_ntl_finish);
-
-static int npu3_dev_regcmp(struct npu3_dev *dev, uint64_t reg,
- const char *reg_name, uint64_t expected)
-{
- uint64_t val;
-
- val = npu3_read(dev->npu, reg);
- if (val == expected)
- return 0;
-
- NPU3DEVERR(dev, "%s: expected 0x%llx, read 0x%llx\n",
- reg_name, expected, val);
-
- return 1;
-}
-
-#define REGCMP(reg, expected) \
- npu3_dev_regcmp(dev, reg(dev->index), #reg, expected)
-
-static uint32_t check_credits(struct npu3_dev *dev)
-{
- /* Use bitwise OR to prevent short-circuit evaluation */
- if (REGCMP(NPU3_NTL_CREQ_HDR_CRED_RCV, 0x0be0be0000000000ull) |
- REGCMP(NPU3_NTL_DGD_HDR_CRED_RCV, 0x0640640000000000ull) |
- REGCMP(NPU3_NTL_ATSD_HDR_CRED_RCV, 0x0200200000000000ull) |
- REGCMP(NPU3_NTL_RSP_HDR_CRED_RCV, 0x0be0be0000000000ull) |
- REGCMP(NPU3_NTL_CREQ_DAT_CRED_RCV, 0x1001000000000000ull) |
- REGCMP(NPU3_NTL_RSP_DAT_CRED_RCV, 0x1001000000000000ull))
- return NPU3_PROC_COMPLETE | NPU3_PROC_FAILED;
-
- return NPU3_PROC_COMPLETE;
-}
-
-DEFINE_PROCEDURE(check_credits);
-
-static struct procedure *procedures[] = {
- [0] = &procedure_stop,
- [1] = &procedure_nop,
- [4] = &procedure_phy_reset,
- [5] = &procedure_phy_tx_zcal,
- [6] = &procedure_phy_rx_dccal,
- [7] = &procedure_phy_enable_tx_rxcal,
- [8] = &procedure_phy_disable_tx_rxcal,
- [9] = &procedure_phy_rx_training,
- [10] = &procedure_reset_ntl,
- [11] = &procedure_nop, /* Placeholder for pre-terminate */
- [12] = &procedure_nop, /* Placeholder for terminate */
- [13] = &procedure_check_credits,
-};
-
-void npu3_dev_procedure_init(struct npu3_dev *dev, uint32_t pnum)
-{
- struct npu3_procedure *proc = &dev->proc;
- const char *name;
-
- if (pnum >= ARRAY_SIZE(procedures) || !procedures[pnum]) {
- NPU3DEVERR(dev, "Unsupported procedure number %d\n", pnum);
- proc->status = NPU3_PROC_COMPLETE | NPU3_PROC_UNSUPPORTED;
- return;
- }
-
- name = procedures[pnum]->name;
-
- if (proc->number == pnum && !(proc->status & NPU3_PROC_COMPLETE))
- NPU3DEVINF(dev, "Restarting procedure %s\n", name);
- else
- NPU3DEVINF(dev, "Starting procedure %s\n", name);
-
- proc->status = NPU3_PROC_INPROGRESS;
- proc->number = pnum;
- proc->step = 0;
- proc->timeout = mftb() + msecs_to_tb(1000);
-}
-
-static uint32_t npu3_dev_procedure_run_step(struct npu3_dev *dev)
-{
- struct npu3_procedure *proc = &dev->proc;
- uint32_t result;
-
- result = procedures[proc->number]->steps[proc->step](dev);
- if (result & NPU3_PROC_NEXT) {
- proc->step++;
-
- NPU3DEVINF(dev, "Running procedure %s step %d\n",
- procedures[proc->number]->name, proc->step);
- }
-
- return result;
-}
-
-static void npu3_dev_procedure_run(struct npu3_dev *dev)
-{
- struct npu3_procedure *proc = &dev->proc;
- const char *name;
- uint32_t result;
-
- do {
- result = npu3_dev_procedure_run_step(dev);
- } while (result & NPU3_PROC_NEXT);
-
- name = procedures[proc->number]->name;
-
- if (result & NPU3_PROC_COMPLETE) {
- NPU3DEVINF(dev, "Procedure %s complete\n", name);
- } else if (tb_compare(mftb(), proc->timeout) == TB_AAFTERB) {
- NPU3DEVINF(dev, "Procedure %s timed out\n", name);
- result = NPU3_PROC_COMPLETE | NPU3_PROC_FAILED;
- }
-
- /* Mask off internal state bits */
- proc->status = result & NPU3_PROC_STATUS_MASK;
-}
-
-uint32_t npu3_dev_procedure_status(struct npu3_dev *dev)
-{
- /* Run the procedure if not already complete */
- if (!(dev->proc.status & NPU3_PROC_COMPLETE))
- npu3_dev_procedure_run(dev);
-
- return dev->proc.status;
-}
-
-int64_t npu3_dev_reset(struct npu3_dev *dev)
-{
- unsigned long timeout;
-
- reset_ntl(dev);
- timeout = mftb() + msecs_to_tb(1000);
-
- while (npu3_dev_fence_get(dev) != NPU3_NTL_CQ_FENCE_STATUS_FULL) {
- if (tb_compare(mftb(), timeout) == TB_AAFTERB) {
- NPU3DEVINF(dev, "Device reset timed out\n");
- return OPAL_BUSY;
- }
- }
-
- return OPAL_SUCCESS;
-}
diff --git a/hw/npu3-nvlink.c b/hw/npu3-nvlink.c
deleted file mode 100644
index 920864b..0000000
--- a/hw/npu3-nvlink.c
+++ /dev/null
@@ -1,1828 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-/*
- * Copyright 2019 IBM Corp.
- */
-
-#include <skiboot.h>
-#include <device.h>
-#include <phys-map.h>
-#include <npu3.h>
-#include <npu3-regs.h>
-#include <pci-virt.h>
-#include <xscom.h>
-#include <xscom-p9-regs.h>
-#include <interrupts.h>
-#include <pci-cfg.h>
-#include <pci-slot.h>
-#include <cache-p9.h>
-
-#define NPU3LOG(l, npu, fmt, a...) \
- prlog(l, "NPU#%04x[%d:%d]: " fmt, \
- (npu)->nvlink.phb.opal_id, \
- (npu)->chip_id, \
- (npu)->index, ##a)
-#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a)
-#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a)
-#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a)
-
-#define NPU3DEVLOG(l, dev, fmt, a...) \
- prlog(l, "NPU#%04x:%02x:%02x.%x " fmt, \
- (dev)->npu->nvlink.phb.opal_id, \
- PCI_BUS_NUM((dev)->nvlink.pvd->bdfn), \
- PCI_DEV((dev)->nvlink.pvd->bdfn), \
- PCI_FUNC((dev)->nvlink.pvd->bdfn), ##a)
-#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a)
-#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a)
-#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a)
-
-#define NPU3_CFG_READ(size, type) \
-static int64_t npu3_cfg_read##size(struct phb *phb, uint32_t bdfn, \
- uint32_t offset, type *data) \
-{ \
- uint32_t val; \
- int64_t ret; \
- \
- ret = pci_virt_cfg_read(phb, bdfn, offset, \
- sizeof(*data), &val); \
- *data = (type)val; \
- return ret; \
-}
-
-#define NPU3_CFG_WRITE(size, type) \
-static int64_t npu3_cfg_write##size(struct phb *phb, uint32_t bdfn, \
- uint32_t offset, type data) \
-{ \
- uint32_t val = data; \
- int64_t ret; \
- \
- ret = pci_virt_cfg_write(phb, bdfn, offset, \
- sizeof(data), val); \
- return ret; \
-}
-
-NPU3_CFG_READ(8, u8);
-NPU3_CFG_READ(16, u16);
-NPU3_CFG_READ(32, u32);
-NPU3_CFG_WRITE(8, u8);
-NPU3_CFG_WRITE(16, u16);
-NPU3_CFG_WRITE(32, u32);
-
-static int64_t npu3_eeh_freeze_status(struct phb *phb __unused,
- uint64_t pe_num __unused,
- uint8_t *freeze_state,
- uint16_t *pci_error_type,
- uint16_t *severity)
-{
- /*
- * FIXME: When it's called by skiboot PCI config accessor,
- * the PE number is fixed to 0, which is incorrect. We need
- * introduce another PHB callback to translate it. For now,
- * it keeps the skiboot PCI enumeration going.
- */
- *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
- *pci_error_type = OPAL_EEH_NO_ERROR;
-
- if (severity)
- *severity = OPAL_EEH_SEV_NO_ERROR;
-
- return OPAL_SUCCESS;
-}
-
-/* Number of PEs supported */
-#define NPU3_MAX_PE_NUM 16
-#define NPU3_RESERVED_PE_NUM 15
-
-static int64_t npu3_ioda_reset(struct phb *phb, bool purge __unused)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- uint64_t val;
-
- val = NPU3_ATS_IODA_ADDR_AUTO_INC;
- val = SETFIELD(NPU3_ATS_IODA_ADDR_TBL_SEL, val,
- NPU3_ATS_IODA_ADDR_TBL_TVT);
- npu3_write(npu, NPU3_ATS_IODA_ADDR, val);
-
- for (uint32_t i = 0; i < NPU3_MAX_PE_NUM; i++)
- npu3_write(npu, NPU3_ATS_IODA_DATA, 0ull);
-
- return OPAL_SUCCESS;
-}
-
-static inline void npu3_ioda_sel(struct npu3 *npu, uint32_t table,
- uint32_t index)
-{
- uint64_t val;
-
- val = SETFIELD(NPU3_ATS_IODA_ADDR_TBL_SEL, 0ull, table);
- val = SETFIELD(NPU3_ATS_IODA_ADDR_TBL_ADDR, val, index);
- npu3_write(npu, NPU3_ATS_IODA_ADDR, val);
-}
-
-static int64_t npu3_map_pe_dma_window(struct phb *phb,
- uint64_t pe_num,
- uint16_t window_id,
- uint16_t tce_levels,
- uint64_t tce_table_addr,
- uint64_t tce_table_size,
- uint64_t tce_page_size)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- uint64_t tts_encoded, val;
- uint32_t page_size;
-
- /* Each PE has one corresponding TVE */
- if (window_id != pe_num || pe_num >= NPU3_MAX_PE_NUM)
- return OPAL_PARAMETER;
-
- npu3_ioda_sel(npu, NPU3_ATS_IODA_ADDR_TBL_TVT, pe_num);
-
- /* TCE table size zero is used to disable the TVE */
- if (!tce_table_size) {
- npu3_write(npu, NPU3_ATS_IODA_DATA, 0ull);
- return OPAL_SUCCESS;
- }
-
- /* TCE table size */
- if (!is_pow2(tce_table_size) || tce_table_size < 0x1000)
- return OPAL_PARAMETER;
-
- tts_encoded = ilog2(tce_table_size) - 11;
- if (tts_encoded > 39)
- return OPAL_PARAMETER;
-
- val = SETFIELD(NPU3_ATS_IODA_TVT_TABLE_SIZE, 0ull, tts_encoded);
-
- /* Number of levels */
- if (tce_levels < 1 || tce_levels > 4)
- return OPAL_PARAMETER;
-
- val = SETFIELD(NPU3_ATS_IODA_TVT_TABLE_LEVEL, val, tce_levels - 1);
-
- /* TCE page size */
- switch (tce_page_size) {
- case 256 << 20:
- page_size = 17;
- break;
- case 16 << 20:
- page_size = 13;
- break;
- case 64 << 10:
- page_size = 5;
- break;
- default:
- page_size = 1;
- }
-
- val = SETFIELD(NPU3_ATS_IODA_TVT_PAGE_SIZE, val, page_size);
- val = SETFIELD(NPU3_ATS_IODA_TVT_XLAT_ADDR, val, tce_table_addr >> 12);
- npu3_write(npu, NPU3_ATS_IODA_DATA, val);
-
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_map_pe_dma_window_real(struct phb *phb,
- uint64_t pe_num,
- uint16_t window_id,
- uint64_t pci_start_addr __unused,
- uint64_t pci_mem_size __unused)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- uint64_t val;
-
- /* Each PE has one corresponding TVE */
- if (window_id != pe_num || pe_num >= NPU3_MAX_PE_NUM)
- return OPAL_PARAMETER;
-
- if (pci_mem_size) {
- /*
- * GPUs need to be able to access the MMIO memory space as well.
- * On POWER9 this is above the top of RAM, so disable the TVT
- * range check, allowing access to all memory addresses.
- */
- val = 0;
- } else {
- /* Disable */
- val = PPC_BIT(51);
- }
-
- npu3_ioda_sel(npu, NPU3_ATS_IODA_ADDR_TBL_TVT, pe_num);
- npu3_write(npu, NPU3_ATS_IODA_DATA, val);
-
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_next_error(struct phb *phb,
- uint64_t *first_frozen_pe,
- uint16_t *pci_error_type,
- uint16_t *severity)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- uint64_t val;
- uint32_t pe_num;
-
- if (!first_frozen_pe || !pci_error_type || !severity)
- return OPAL_PARAMETER;
-
- *first_frozen_pe = -1;
- *pci_error_type = OPAL_EEH_NO_ERROR;
- *severity = OPAL_EEH_SEV_NO_ERROR;
-
- for (pe_num = 0; pe_num < NPU3_MAX_PE_NUM; pe_num++) {
- val = npu3_read(npu, NPU3_MISC_PESTB_DATA(pe_num));
- if (!GETFIELD(NPU3_MISC_PESTB_DATA_DMA_STOPPED_STATE, val))
- continue;
-
- *first_frozen_pe = pe_num;
- *pci_error_type = OPAL_EEH_PE_ERROR;
- *severity = OPAL_EEH_SEV_PE_ER;
- break;
- }
-
- return OPAL_SUCCESS;
-}
-
-static struct npu3_dev *npu3_bdfn_to_dev(struct npu3 *npu, uint32_t bdfn)
-{
- struct pci_virt_device *pvd;
-
- /* All emulated devices are attached to root bus */
- if (bdfn & ~0xff)
- return NULL;
-
- pvd = pci_virt_find_device(&npu->nvlink.phb, bdfn);
- if (pvd)
- return pvd->data;
-
- return NULL;
-}
-
-static int npu3_match_gpu(struct phb *phb __unused, struct pci_device *pd,
- void *data)
-{
- const char *slot = data;
- struct dt_node *dn;
- char *loc_code;
-
- /* Ignore non-NVIDIA devices */
- if (PCI_VENDOR_ID(pd->vdid) != 0x10de)
- return 0;
-
- /* Find the PCI device's slot location */
- for (dn = pd->dn;
- dn && !dt_find_property(dn, "ibm,loc-code");
- dn = dn->parent);
-
- if (!dn)
- return 0;
-
- loc_code = (char *)dt_prop_get(dn, "ibm,loc-code");
- if (streq(loc_code, slot))
- return 1;
-
- return 0;
-}
-
-static void npu3_dev_find_gpu(struct npu3_dev *dev)
-{
- const char *slot = dev->nvlink.loc_code;
- struct phb *phb;
- struct pci_device *gpu;
-
- if (!slot)
- return;
-
- for_each_phb(phb) {
- gpu = pci_walk_dev(phb, NULL, npu3_match_gpu, (void *)slot);
- if (!gpu)
- continue;
-
- dev->nvlink.gpu = gpu;
- return;
- }
-
- NPU3DEVINF(dev, "No PCI device found for slot '%s'\n", slot);
-}
-
-#define VENDOR_CAP_START 0x80
-#define VENDOR_CAP_LINK_FLAG_OFFSET 0x0d
-
-void npu3_pvd_flag_set(struct npu3_dev *dev, uint8_t flag)
-{
- uint32_t offset = VENDOR_CAP_START + VENDOR_CAP_LINK_FLAG_OFFSET;
- uint32_t flags;
-
- PCI_VIRT_CFG_RDONLY_RD(dev->nvlink.pvd, offset, 1, &flags);
- flags |= flag;
- PCI_VIRT_CFG_INIT_RO(dev->nvlink.pvd, offset, 1, flags);
-}
-
-void npu3_pvd_flag_clear(struct npu3_dev *dev, uint8_t flag)
-{
- uint32_t offset = VENDOR_CAP_START + VENDOR_CAP_LINK_FLAG_OFFSET;
- uint32_t flags;
-
- PCI_VIRT_CFG_RDONLY_RD(dev->nvlink.pvd, offset, 1, &flags);
- flags &= ~flag;
- PCI_VIRT_CFG_INIT_RO(dev->nvlink.pvd, offset, 1, flags);
-}
-
-static struct lock npu3_phandle_lock = LOCK_UNLOCKED;
-
-static void npu3_append_phandle(struct dt_node *dn, const char *name,
- uint32_t phandle)
-{
- struct dt_property *prop;
- uint32_t *phandles;
- size_t len;
-
- prop = __dt_find_property(dn, name);
- if (!prop) {
- dt_add_property_cells(dn, name, phandle);
- return;
- }
-
- /*
- * Make sure no one else has a reference to the property. Assume
- * this is the only function that holds a reference to it.
- */
- lock(&npu3_phandle_lock);
-
- /* Need to append to the property */
- len = prop->len + sizeof(*phandles);
- dt_resize_property(&prop, len);
-
- phandles = (uint32_t *)prop->prop;
- phandles[len / sizeof(*phandles) - 1] = phandle;
-
- unlock(&npu3_phandle_lock);
-}
-
-static void npu3_dev_fixup_dt(struct npu3_dev *dev)
-{
- struct pci_device *pd = dev->nvlink.pd;
- struct pci_device *gpu = dev->nvlink.gpu;
-
- dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dn->phandle);
- dt_add_property_string(pd->dn, "ibm,loc-code", dev->nvlink.loc_code);
- if (dev->link_speed != 0xff)
- dt_add_property_cells(pd->dn, "ibm,nvlink-speed",
- lo32(dev->link_speed));
-
- if (!gpu)
- return;
-
- npu3_append_phandle(gpu->dn, "ibm,npu", pd->dn->phandle);
- dt_add_property_cells(pd->dn, "ibm,gpu", gpu->dn->phandle);
-}
-
-static int64_t npu3_gpu_bridge_sec_bus_reset(void *pdev,
- struct pci_cfg_reg_filter *pcrf __unused,
- uint32_t offset, uint32_t len,
- uint32_t *data, bool write)
-{
- struct pci_device *pd = pdev;
- struct pci_device *gpu;
- struct npu3 *npu;
- struct npu3_dev *dev;
- bool purge = false;
-
- if (!write)
- return OPAL_PARAMETER;
-
- if (len != 2 || offset & 1) {
- PCIERR(pd->phb, pd->bdfn,
- "Unsupported write to bridge control register\n");
- return OPAL_PARAMETER;
- }
-
- if (!(*data & PCI_CFG_BRCTL_SECONDARY_RESET))
- return OPAL_PARTIAL;
-
- gpu = list_top(&pd->children, struct pci_device, link);
- if (!gpu)
- return OPAL_PARTIAL;
-
- npu3_for_each_nvlink_npu(npu)
- npu3_for_each_nvlink_dev(dev, npu)
- if (dev->nvlink.gpu == gpu)
- if (!npu3_dev_reset(dev))
- purge = true;
-
- if (purge)
- purge_l2_l3_caches();
-
- return OPAL_PARTIAL;
-}
-
-static int npu3_dev_bind(struct phb *phb, struct pci_device *pd,
- void *data __unused)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- struct npu3_dev *dev = npu3_bdfn_to_dev(npu, pd->bdfn);
- struct pci_device *gpu;
-
- dev->nvlink.pd = pd;
-
- /* The slot label indicates which GPU this link is connected to */
- dev->nvlink.loc_code = dt_prop_get_def(dev->dn, "ibm,slot-label", NULL);
- if (!dev->nvlink.loc_code) {
- /**
- * @fwts-label NPUNoPHBSlotLabel
- * @fwts-advice No GPU/NPU slot information was found.
- * NVLink3 functionality will not work.
- */
- NPU3DEVERR(dev, "Cannot find GPU slot information\n");
- }
-
- npu3_dev_find_gpu(dev);
- npu3_dev_fixup_dt(dev);
-
- gpu = dev->nvlink.gpu;
- if (!gpu)
- return 0;
-
- /* When a GPU is reset, ensure all of its links are reset too */
- if (gpu->parent && gpu->parent->slot)
- pci_add_cfg_reg_filter(gpu->parent, PCI_CFG_BRCTL, 2,
- PCI_REG_FLAG_WRITE,
- npu3_gpu_bridge_sec_bus_reset);
-
- npu3_pvd_flag_set(dev, NPU3_DEV_PCI_LINKED);
-
- return 0;
-}
-
-struct npu3 *npu3_next_nvlink_npu(struct npu3 *npu, uint32_t chip_id)
-{
- uint64_t phb_id = 0;
- struct phb *phb;
-
- if (npu)
- phb_id = npu->nvlink.phb.opal_id + 1;
-
- for (; (phb = __pci_next_phb_idx(&phb_id));) {
- if (phb->phb_type != phb_type_npu_v3)
- continue;
-
- npu = npu3_phb_to_npu(phb);
- if (npu->chip_id == chip_id || chip_id == NPU3_ANY_CHIP)
- return npu;
- }
-
- return NULL;
-}
-
-static struct npu3 *npu3_last_npu(void)
-{
- static struct npu3 *last = NULL;
- struct npu3 *npu;
-
- if (last)
- return last;
-
- npu3_for_each_nvlink_npu(npu)
- last = npu;
-
- return last;
-}
-
-static uint32_t npu3_gpu_links(struct pci_device *gpu)
-{
- const struct dt_property *prop;
-
- if (!gpu)
- return 0;
-
- /* The link count is the number of phandles in "ibm,npu" */
- prop = dt_find_property(gpu->dn, "ibm,npu");
- if (!prop)
- return 0;
-
- return prop->len / sizeof(uint32_t);
-}
-
-static uint32_t npu3_links_per_gpu(void)
-{
- struct npu3 *npu;
- struct npu3_dev *dev;
- uint32_t links = 0;
-
- /* Use the first GPU we find to figure this out */
- npu3_for_each_nvlink_npu(npu) {
- npu3_for_each_nvlink_dev(dev, npu) {
- links = npu3_gpu_links(dev->nvlink.gpu);
- if (links)
- goto out;
- }
- }
-
-out:
- prlog(PR_DEBUG, "NPU: %s: %d\n", __func__, links);
-
- return links;
-}
-
-int32_t npu3_dev_gpu_index(struct npu3_dev *dev)
-{
- const char *slot;
- char *p = NULL;
- int ret;
-
- slot = dev->nvlink.loc_code;
- if (!slot)
- return -1;
-
- if (memcmp(slot, "GPU", 3))
- return -1;
-
- ret = strtol(slot + 3, &p, 10);
- if (*p || p == slot + 3)
- return -1;
-
- return ret;
-}
-
-static uint32_t npu3_chip_possible_gpu_links(void)
-{
- struct proc_chip *chip;
- struct npu3 *npu;
- struct npu3_dev *dev;
- uint32_t possible = 0;
-
- for_each_chip(chip) {
- npu3_for_each_chip_nvlink_npu(npu, chip->id)
- npu3_for_each_nvlink_dev(dev, npu)
- if (npu3_dev_gpu_index(dev) != -1)
- possible++;
-
- if (possible)
- break;
- }
-
- prlog(PR_DEBUG, "NPU: %s: %d\n", __func__, possible);
-
- return possible;
-}
-
-uint32_t npu3_chip_possible_gpus(void)
-{
- static uint32_t possible = -1;
- uint32_t links_per_gpu;
-
- /* Static value, same for all chips; only do this once */
- if (possible != -1)
- return possible;
-
- possible = 0;
-
- links_per_gpu = npu3_links_per_gpu();
- if (links_per_gpu)
- possible = npu3_chip_possible_gpu_links() / links_per_gpu;
-
- prlog(PR_DEBUG, "NPU: %s: %d\n", __func__, possible);
-
- return possible;
-}
-
-static void npu3_dev_assign_gmb(struct npu3_dev *dev, uint64_t addr,
- uint64_t size)
-{
- uint32_t mode;
- uint64_t val;
-
- switch (npu3_gpu_links(dev->nvlink.gpu)) {
- case 0:
- return;
- case 1:
- mode = 0;
- break;
- case 2:
- mode = 1;
- break;
- case 3:
- mode = 3;
- break;
- case 4:
- mode = 6;
- break;
- case 6:
- mode = 10;
- break;
- default:
- /* Hardware does not support this configuration */
- assert(0);
- }
-
- mode += PCI_FUNC(dev->nvlink.pvd->bdfn);
-
- val = NPU3_GPU_MEM_BAR_ENABLE |
- NPU3_GPU_MEM_BAR_POISON;
- val = SETFIELD(NPU3_GPU_MEM_BAR_ADDR, val, addr >> 30);
- val = SETFIELD(NPU3_GPU_MEM_BAR_SIZE, val, size >> 30);
- val = SETFIELD(NPU3_GPU_MEM_BAR_MODE, val, mode);
-
- npu3_write(dev->npu, NPU3_GPU_MEM_BAR(dev->index), val);
-}
-
-static struct dt_node *npu3_create_memory_dn(struct npu3_dev *dev,
- uint32_t gpu_index, uint64_t addr,
- uint64_t size)
-{
- uint32_t nid = 255 - gpu_index;
- struct dt_node *mem;
-
- mem = dt_find_by_name_addr(dt_root, "memory", addr);
- if (mem)
- return mem;
-
- mem = dt_new_addr(dt_root, "memory", addr);
- assert(mem);
-
- dt_add_property_string(mem, "device_type", "memory");
- dt_add_property_string(mem, "compatible", "ibm,coherent-device-memory");
- dt_add_property_u64s(mem, "reg", addr, size);
- dt_add_property_u64s(mem, "linux,usable-memory", addr, 0);
- dt_add_property_cells(mem, "ibm,chip-id", nid);
- dt_add_property_cells(mem, "ibm,associativity", 4, nid, nid, nid, nid);
-
- NPU3INF(dev->npu, "%s mem: 0x%016llx (nid %d)\n", dev->nvlink.loc_code,
- addr, nid);
-
- return mem;
-}
-
-static void npu3_dev_init_gpu_mem(struct npu3_dev *dev)
-{
- struct pci_device *pd = dev->nvlink.pd;
- struct npu3 *npu = dev->npu;
- struct dt_node *mem;
- uint64_t addr, size, gta;
- uint32_t gpu_index;
-
- if (!dev->nvlink.gpu)
- return;
-
- gpu_index = npu3_dev_gpu_index(dev) % npu3_chip_possible_gpus();
- phys_map_get(npu->chip_id, GPU_MEM_4T_DOWN, gpu_index, &addr, &size);
-
- npu3_dev_assign_gmb(dev, addr, size);
- mem = npu3_create_memory_dn(dev, gpu_index, addr, size);
-
- /*
- * Coral mode address compression. This is documented in Figure 3.5 of
- * the NPU workbook; "P9->GPU RA Compression (Coral)".
- */
- gta = (addr >> 42 & 0x1) << 42;
- gta |= (addr >> 45 & 0x3) << 43;
- gta |= (addr >> 49 & 0x3) << 45;
- gta |= addr & ((1ul << 43) - 1);
-
- dt_add_property_cells(pd->dn, "memory-region", mem->phandle);
- dt_add_property_u64s(pd->dn, "ibm,device-tgt-addr", gta);
-}
-
-static void npu3_final_fixup(void)
-{
- struct npu3 *npu;
- struct npu3_dev *dev;
-
- npu3_for_each_nvlink_npu(npu)
- npu3_for_each_nvlink_dev(dev, npu)
- npu3_dev_init_gpu_mem(dev);
-}
-
-static void npu3_phb_final_fixup(struct phb *phb)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
-
- pci_walk_dev(phb, NULL, npu3_dev_bind, NULL);
-
- /*
- * After every npu's devices are bound, do gpu-related fixup. This
- * counts on npu3_last_npu() walking the phbs in the same order as
- * the PHB final fixup loop in __pci_init_slots().
- */
- if (npu == npu3_last_npu())
- npu3_final_fixup();
-}
-
-static int64_t npu3_set_pe(struct phb *phb,
- uint64_t pe_num,
- uint64_t bdfn,
- uint8_t bcompare,
- uint8_t dcompare,
- uint8_t fcompare,
- uint8_t action)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- struct npu3_dev *dev;
- uint64_t val;
-
- dev = npu3_bdfn_to_dev(npu, bdfn);
- if (!dev)
- return OPAL_PARAMETER;
-
- if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
- return OPAL_PARAMETER;
-
- if (pe_num >= NPU3_MAX_PE_NUM)
- return OPAL_PARAMETER;
-
- if (bcompare != OpalPciBusAll ||
- dcompare != OPAL_COMPARE_RID_DEVICE_NUMBER ||
- fcompare != OPAL_COMPARE_RID_FUNCTION_NUMBER)
- return OPAL_UNSUPPORTED;
-
- if (!dev->nvlink.gpu)
- return OPAL_SUCCESS;
-
- val = NPU3_CTL_BDF2PE_CFG_ENABLE;
- val = SETFIELD(NPU3_CTL_BDF2PE_CFG_PE, val, pe_num);
- val = SETFIELD(NPU3_CTL_BDF2PE_CFG_BDF, val, dev->nvlink.gpu->bdfn);
- npu3_write(npu, NPU3_CTL_BDF2PE_CFG(pe_num), val);
-
- val = NPU3_MISC_BDF2PE_CFG_ENABLE;
- val = SETFIELD(NPU3_MISC_BDF2PE_CFG_PE, val, pe_num);
- val = SETFIELD(NPU3_MISC_BDF2PE_CFG_BDF, val, dev->nvlink.gpu->bdfn);
- npu3_write(npu, NPU3_MISC_BDF2PE_CFG(pe_num), val);
-
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_tce_kill_pages(struct npu3 *npu,
- uint64_t pe_num,
- uint32_t tce_size,
- uint64_t dma_addr,
- uint32_t npages)
-{
- uint32_t check_tce_size;
- uint64_t val;
-
- if (pe_num >= NPU3_MAX_PE_NUM)
- return OPAL_PARAMETER;
-
- npu3_ioda_sel(npu, NPU3_ATS_IODA_ADDR_TBL_TVT, pe_num);
- val = npu3_read(npu, NPU3_ATS_IODA_DATA);
-
- check_tce_size = 0x800 << GETFIELD(NPU3_ATS_IODA_TVT_PAGE_SIZE, val);
- if (check_tce_size != tce_size) {
- NPU3ERR(npu, "%s: Unexpected TCE size (got 0x%x, expected 0x%x)\n",
- __func__, tce_size, check_tce_size);
-
- return OPAL_PARAMETER;
- }
-
- val = NPU3_ATS_TCE_KILL_ONE;
- val = SETFIELD(NPU3_ATS_TCE_KILL_PE_NUMBER, val, pe_num);
-
- while (npages--) {
- val = SETFIELD(NPU3_ATS_TCE_KILL_ADDRESS, val, dma_addr >> 12);
- npu3_write(npu, NPU3_ATS_TCE_KILL, val);
-
- dma_addr += tce_size;
- }
-
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_tce_kill(struct phb *phb,
- uint32_t kill_type,
- uint64_t pe_num,
- uint32_t tce_size,
- uint64_t dma_addr,
- uint32_t npages)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
-
- sync();
-
- switch(kill_type) {
- case OPAL_PCI_TCE_KILL_PAGES:
- return npu3_tce_kill_pages(npu, pe_num, tce_size,
- dma_addr, npages);
- case OPAL_PCI_TCE_KILL_PE:
- /*
- * NPU doesn't support killing a PE so fall through
- * and do a kill all instead.
- */
- case OPAL_PCI_TCE_KILL_ALL:
- npu3_write(npu, NPU3_ATS_TCE_KILL, NPU3_ATS_TCE_KILL_ALL);
- return OPAL_SUCCESS;
- }
-
- return OPAL_PARAMETER;
-}
-
-static const struct phb_ops npu_ops = {
- .cfg_read8 = npu3_cfg_read8,
- .cfg_read16 = npu3_cfg_read16,
- .cfg_read32 = npu3_cfg_read32,
- .cfg_write8 = npu3_cfg_write8,
- .cfg_write16 = npu3_cfg_write16,
- .cfg_write32 = npu3_cfg_write32,
- .eeh_freeze_status = npu3_eeh_freeze_status,
- .ioda_reset = npu3_ioda_reset,
- .map_pe_dma_window = npu3_map_pe_dma_window,
- .map_pe_dma_window_real = npu3_map_pe_dma_window_real,
- .next_error = npu3_next_error,
- .phb_final_fixup = npu3_phb_final_fixup,
- .set_pe = npu3_set_pe,
- .tce_kill = npu3_tce_kill,
-};
-
-static int64_t npu3_reset(struct pci_slot *slot)
-{
- struct npu3 *npu = npu3_phb_to_npu(slot->phb);
- struct npu3_dev *dev;
- int64_t rc = OPAL_SUCCESS;
- bool purge = false;
-
- npu3_for_each_nvlink_dev(dev, npu) {
- rc = npu3_dev_reset(dev);
- if (rc)
- break;
-
- purge = true;
- }
-
- /* No devices reset; don't purge, just return */
- if (!purge)
- return rc;
-
- /* All devices reset */
- if (!rc)
- return purge_l2_l3_caches();
-
- /* Some devices successfully reset; purge, but still return error */
- purge_l2_l3_caches();
- return rc;
-}
-
-static int64_t npu3_freset(struct pci_slot *slot __unused)
-{
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_get_link_state(struct pci_slot *slot __unused,
- uint8_t *val)
-{
- *val = OPAL_SHPC_LINK_UP_x1;
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_get_power_state(struct pci_slot *slot __unused,
- uint8_t *val)
-{
- *val = PCI_SLOT_POWER_ON;
- return OPAL_SUCCESS;
-}
-
-static void npu3_create_phb_slot(struct npu3 *npu)
-{
- struct pci_slot *slot;
-
- slot = pci_slot_alloc(&npu->nvlink.phb, NULL);
- if (!slot)
- return;
-
- /* Elementary functions */
- slot->ops.creset = npu3_reset;
- slot->ops.freset = npu3_freset;
- slot->ops.hreset = npu3_reset;
- slot->ops.get_link_state = npu3_get_link_state;
- slot->ops.get_power_state = npu3_get_power_state;
-}
-
-static void npu3_create_phb(struct npu3 *npu)
-{
- struct phb *phb = &npu->nvlink.phb;
-
- phb->phb_type = phb_type_npu_v3;
- phb->ops = &npu_ops;
- phb->dt_node = dt_new_addr(dt_root, "pciex", npu->regs[0]);
- assert(phb->dt_node);
-
- list_head_init(&phb->virt_devices);
- pci_register_phb(phb, npu3_get_opal_id(npu->chip_id,
- npu3_get_phb_index(npu->index)));
- npu3_create_phb_slot(npu);
- npu3_ioda_reset(phb, true);
-}
-
-static void npu3_dev_init_hw(struct npu3_dev *dev)
-{
- struct npu3 *npu = dev->npu;
- uint64_t reg, val;
-
- reg = NPU3_RELAXED_CFG2(dev->index);
- val = npu3_read(npu, reg);
- val |= NPU3_RELAXED_CFG2_CMD_CL_DMA_W |
- NPU3_RELAXED_CFG2_CMD_CL_DMA_W_HP |
- NPU3_RELAXED_CFG2_CMD_CL_DMA_INJ |
- NPU3_RELAXED_CFG2_CMD_PR_DMA_INJ |
- NPU3_RELAXED_CFG2_CMD_DMA_PR_W |
- NPU3_RELAXED_CFG2_CMD_CL_RD_NC_F0 |
- NPU3_RELAXED_CFG2_SRC_RDENA(0);
- npu3_write(npu, reg, val);
-
- reg = NPU3_NTL_MISC_CFG2(dev->index);
- val = npu3_read(npu, reg);
- val |= NPU3_NTL_MISC_CFG2_BRICK_ENABLE |
- NPU3_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
- npu3_write(npu, reg, val);
-}
-
-static void npu3_init_hw(struct npu3 *npu)
-{
- struct npu3_dev *dev;
- uint64_t reg, val;
-
- reg = NPU3_XTS_CFG;
- val = npu3_read(npu, reg);
- val |= NPU3_XTS_CFG_MMIOSD | NPU3_XTS_CFG_TRY_ATR_RO;
- npu3_write(npu, reg, val);
-
- reg = NPU3_XTS_CFG2;
- val = npu3_read(npu, reg);
- val |= NPU3_XTS_CFG2_NO_FLUSH_ENA;
- npu3_write(npu, reg, val);
-
- reg = NPU3_RELAXED_SRC(0);
- val = NPU3_RELAXED_SRC_MASK_NPU;
- npu3_write(npu, reg, val);
-
- npu3_for_each_nvlink_dev(dev, npu)
- npu3_dev_init_hw(dev);
-}
-
-/* PCI command register (BAR enable/disable) */
-static int64_t npu3_cfg_cmd(void *pvd,
- struct pci_cfg_reg_filter *pcrf __unused,
- uint32_t offset, uint32_t size,
- uint32_t *data, bool write)
-{
- struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data;
-
- if (!write)
- return OPAL_PARTIAL;
-
- if (offset != PCI_CFG_CMD)
- return OPAL_PARAMETER;
-
- if (size != 1 && size != 2 && size != 4)
- return OPAL_PARAMETER;
-
- npu3_dev_enable_bars(dev, !!(*data & PCI_CFG_CMD_MEM_EN));
-
- return OPAL_PARTIAL;
-}
-
-static int64_t npu3_cfg_bar_write(struct npu3_bar *bar, uint64_t mask,
- uint32_t data)
-{
- if (data != 0xffffffff)
- return OPAL_HARDWARE;
-
- /* Return BAR size on next read */
- bar->trap |= mask;
-
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_cfg_bar_read(struct npu3_bar *bar, uint64_t mask,
- uint32_t *data)
-{
- if (!(bar->trap & mask))
- return OPAL_PARTIAL;
-
- *data = GETFIELD(mask, bar->size);
- bar->trap &= ~mask;
-
- return OPAL_SUCCESS;
-}
-
-/* PCI BAR registers (NTL/GENID) */
-static int64_t npu3_cfg_bar(void *pvd __unused,
- struct pci_cfg_reg_filter *pcrf,
- uint32_t offset, uint32_t size, uint32_t *data,
- bool write)
-{
- struct npu3_bar *bar = (struct npu3_bar *)pcrf->data;
- uint64_t mask;
-
- if (size != 4)
- return OPAL_PARAMETER;
-
- if (offset == pcrf->start)
- mask = 0xffffffff;
- else if (offset == pcrf->start + 4)
- mask = 0xffffffffull << 32;
- else
- return OPAL_PARAMETER;
-
- if (write)
- return npu3_cfg_bar_write(bar, mask, *data);
-
- return npu3_cfg_bar_read(bar, mask, data);
-}
-
-/* PCI control register */
-static int64_t npu3_cfg_devctl(void *pvd,
- struct pci_cfg_reg_filter *pcrf __unused,
- uint32_t offset, uint32_t size,
- uint32_t *data, bool write)
-{
- struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data;
-
- if (!write)
- return OPAL_HARDWARE;
-
- if (size != 2 || offset & 1) {
- NPU3DEVERR(dev, "Unsupported write to pcie control register\n");
- return OPAL_PARAMETER;
- }
-
- if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
- if (!npu3_dev_reset(dev))
- purge_l2_l3_caches();
-
- return OPAL_PARTIAL;
-}
-
-static uint32_t npu3_cfg_populate_pcie_cap(struct npu3_dev *dev, uint32_t start,
- uint32_t prev_cap)
-{
- struct pci_virt_device *pvd = dev->nvlink.pvd;
- uint32_t val;
-
- /* Add capability list */
- PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start);
- PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_EXP);
-
- /* 0x00 - ID/PCIE capability */
- val = PCI_CFG_CAP_ID_EXP;
- val |= 0x2 << 16 | PCIE_TYPE_ENDPOINT << 20;
- PCI_VIRT_CFG_INIT_RO(pvd, start, 4, val);
-
- /* 0x04 - Device capability */
- val = PCIE_MPSS_128 |
- PCIE_PHANTOM_NONE << 3 |
- PCIE_L0SL_MAX_NO_LIMIT << 6 |
- PCIE_L1L_MAX_NO_LIMIT << 9 |
- PCICAP_EXP_DEVCAP_FUNC_RESET;
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_DEVCAP, 4, val);
-
- pci_virt_add_filter(pvd, start + PCICAP_EXP_DEVCTL, 2,
- PCI_REG_FLAG_WRITE,
- npu3_cfg_devctl, NULL);
-
- /* 0x08 - Device control and status */
- PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DEVCTL, 4, 0x00002810,
- 0xffff0000, 0x000f0000);
-
- /* 0x0c - Link capability */
- val = PCIE_LSPEED_VECBIT_2 | PCIE_LWIDTH_1X << 4;
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP, 4, val);
-
- /* 0x10 - Link control and status */
- PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL, 4, 0x00130000,
- 0xfffff000, 0xc0000000);
-
- /* 0x14 - Slot capability */
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCAP, 4, 0x00000000);
-
- /* 0x18 - Slot control and status */
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCTL, 4, 0x00000000);
-
- /* 0x1c - Root control and capability */
- PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RC, 4, 0x00000000,
- 0xffffffe0, 0x00000000);
-
- /* 0x20 - Root status */
- PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RSTAT, 4, 0x00000000,
- 0xffffffff, 0x00010000);
-
- /* 0x24 - Device capability 2 */
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCIECAP_EXP_DCAP2, 4, 0x00000000);
-
- /* 0x28 - Device Control and status 2 */
- PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DCTL2, 4, 0x00070000,
- 0xffff0000, 0x00000000);
-
- /* 0x2c - Link capability 2 */
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP2, 4, 0x00000007);
-
- /* 0x30 - Link control and status 2 */
- PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL2, 4, 0x00000003,
- 0xffff0000, 0x00200000);
-
- /* 0x34 - Slot capability 2 */
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCAP2, 4, 0x00000000);
-
- /* 0x38 - Slot control and status 2 */
- PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCTL2, 4, 0x00000000);
-
- return start + PCICAP_EXP_SCTL2 + 8;
-}
-
-static int64_t npu3_dev_procedure_write(struct npu3_dev *dev, uint32_t offset,
- uint32_t data)
-{
- switch (offset) {
- case 0:
- NPU3DEVINF(dev, "Ignoring write to status register\n");
- break;
- case 4:
- npu3_dev_procedure_init(dev, data);
- break;
- default:
- return OPAL_PARAMETER;
- }
-
- return OPAL_SUCCESS;
-}
-
-static int64_t npu3_dev_procedure_read(struct npu3_dev *dev, uint32_t offset,
- uint32_t *data)
-{
- switch (offset) {
- case 0:
- *data = npu3_dev_procedure_status(dev);
- break;
- case 4:
- *data = dev->proc.number;
- break;
- default:
- *data = 0;
- return OPAL_PARAMETER;
- }
-
- return OPAL_SUCCESS;
-}
-
-/* Hardware procedure control/status registers */
-static int64_t npu3_dev_procedure(void *pvd, struct pci_cfg_reg_filter *pcrf,
- uint32_t offset, uint32_t size,
- uint32_t *data, bool write)
-{
- struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data;
-
- if (size != 4)
- return OPAL_PARAMETER;
-
- offset -= pcrf->start;
-
- if (write)
- return npu3_dev_procedure_write(dev, offset, *data);
-
- return npu3_dev_procedure_read(dev, offset, data);
-}
-
-/* PPE SRAM access is indirect via CSAR/CSDR */
-static void npu3_dev_ppe_sram_sel(struct npu3_dev *dev, uint32_t reg)
-{
- uint64_t val;
-
- val = SETFIELD(OB_PPE_CSAR_SRAM_ADDR, 0ull, reg);
- xscom_write(dev->npu->chip_id, OB_PPE_CSAR(dev->ob_chiplet), val);
-}
-
-static void npu3_dev_ppe_sram_write(struct npu3_dev *dev, uint32_t reg,
- uint64_t val)
-{
- npu3_dev_ppe_sram_sel(dev, reg);
- xscom_write(dev->npu->chip_id, OB_PPE_CSDR(dev->ob_chiplet), val);
-}
-
-static uint64_t npu3_dev_ppe_sram_read(struct npu3_dev *dev, uint32_t reg)
-{
- uint64_t val;
-
- npu3_dev_ppe_sram_sel(dev, reg);
- xscom_read(dev->npu->chip_id, OB_PPE_CSDR(dev->ob_chiplet), &val);
-
- return val;
-}
-
-/* Software-implemented autonomous link training (SALT) */
-static int64_t npu3_dev_salt(void *pvd, struct pci_cfg_reg_filter *pcrf,
- uint32_t offset, uint32_t size, uint32_t *data,
- bool write)
-{
- struct npu3_dev *dev = ((struct pci_virt_device *)pvd)->data;
- unsigned long timeout;
- uint32_t cmd_reg;
- uint64_t val;
-
- if (size != 4 || offset != pcrf->start)
- return OPAL_PARAMETER;
-
- /* The config register before this one holds CMD_REG */
- PCI_VIRT_CFG_NORMAL_RD(pvd, pcrf->start - 4, 4, &cmd_reg);
- if (cmd_reg == 0xffffffff)
- return OPAL_PARAMETER;
-
- /* Check for another command in progress */
- val = npu3_dev_ppe_sram_read(dev, OB_PPE_SALT_CMD);
- if (GETFIELD(OB_PPE_SALT_CMD_READY, val)) {
- NPU3DEVINF(dev, "SALT_CMD 0x%x: Not ready\n", cmd_reg);
- return OPAL_BUSY;
- }
-
- val = OB_PPE_SALT_CMD_READY;
- val = SETFIELD(OB_PPE_SALT_CMD_RW, val, write);
- val = SETFIELD(OB_PPE_SALT_CMD_LINKNUM, val, npu3_chip_dev_index(dev));
- val = SETFIELD(OB_PPE_SALT_CMD_REG, val, cmd_reg);
- if (write)
- val = SETFIELD(OB_PPE_SALT_CMD_DATA, val, *data);
-
- npu3_dev_ppe_sram_write(dev, OB_PPE_SALT_CMD, val);
-
- /* Wait for the go bit to clear */
- timeout = mftb() + msecs_to_tb(1000);
-
- while (GETFIELD(OB_PPE_SALT_CMD_READY, val)) {
- if (tb_compare(mftb(), timeout) == TB_AAFTERB) {
- NPU3DEVINF(dev, "SALT_CMD 0x%x: Timeout\n", cmd_reg);
- return OPAL_BUSY;
- }
-
- val = npu3_dev_ppe_sram_read(dev, OB_PPE_SALT_CMD);
- }
-
- if (GETFIELD(OB_PPE_SALT_CMD_ERR, val))
- NPU3DEVINF(dev, "SALT_CMD 0x%x: Error\n", cmd_reg);
-
- if (!write)
- *data = GETFIELD(OB_PPE_SALT_CMD_DATA, val);
-
- return OPAL_SUCCESS;
-}
-
-#define VENDOR_CAP_LEN 0x1c
-#define VENDOR_CAP_VERSION 0x02
-
-static uint32_t npu3_cfg_populate_vendor_cap(struct npu3_dev *dev,
- uint32_t start, uint32_t prev_cap)
-{
- struct pci_virt_device *pvd = dev->nvlink.pvd;
-
- /* Capabilities list */
- PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start);
- PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR);
-
- /* Length and version */
- PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, VENDOR_CAP_LEN);
- PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, VENDOR_CAP_VERSION);
-
- /*
- * Defaults when the trap can't handle the read/write (eg. due to
- * reading/writing less than 4 bytes).
- */
- PCI_VIRT_CFG_INIT_RO(pvd, start + 4, 4, 0);
- PCI_VIRT_CFG_INIT_RO(pvd, start + 8, 4, 0);
-
- /* PHY procedure trap */
- pci_virt_add_filter(pvd, start + 4, 8,
- PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
- npu3_dev_procedure, NULL);
-
- /* Link index */
- PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, npu3_chip_dev_index(dev));
-
- /* SALT registers */
- PCI_VIRT_CFG_INIT(pvd, start + 0x10, 4, 0xffffffff, 0, 0);
- PCI_VIRT_CFG_INIT_RO(pvd, start + 0x14, 4, 0);
-
- pci_virt_add_filter(pvd, start + 0x14, 4,
- PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
- npu3_dev_salt, NULL);
-
- return start + VENDOR_CAP_LEN;
-}
-
-static void npu3_cfg_populate(struct npu3_dev *dev)
-{
- struct pci_virt_device *pvd = dev->nvlink.pvd;
- uint64_t addr;
- uint32_t pos;
-
- /* 0x00 - Vendor/Device ID */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_VENDOR_ID, 4, 0x04ea1014);
-
- /* 0x04 - Command/Status */
- PCI_VIRT_CFG_INIT(pvd, PCI_CFG_CMD, 4, 0x00100000, 0xffb802b8,
- 0xf9000000);
-
- pci_virt_add_filter(pvd, PCI_CFG_CMD, 1, PCI_REG_FLAG_WRITE,
- npu3_cfg_cmd, NULL);
-
- /* 0x08 - Rev/Class/Cache */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_REV_ID, 4, 0x06800102);
-
- /* 0x0c - CLS/Latency Timer/Header/BIST */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000);
-
- /* 0x10/14 - NTL BAR */
- addr = SETFIELD(0xf, dev->ntl_bar.addr,
- PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64);
- PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4, lo32(addr), 0xf, 0);
- PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR1, 4, hi32(addr), 0, 0);
-
- pci_virt_add_filter(pvd, PCI_CFG_BAR0, 8,
- PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
- npu3_cfg_bar, &dev->ntl_bar);
-
- /* 0x18/1c - GENID BAR */
- addr = SETFIELD(0xf, dev->genid_bar.addr,
- PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64);
- PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR2, 4, lo32(addr), 0xf, 0);
- PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR3, 4, hi32(addr), 0, 0);
-
- pci_virt_add_filter(pvd, PCI_CFG_BAR2, 8,
- PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
- npu3_cfg_bar, &dev->genid_bar);
-
- /* 0x20/0x24 - BARs, disabled */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR4, 4, 0x00000000);
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR5, 4, 0x00000000);
-
- /* 0x28 - Cardbus CIS pointer */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CARDBUS_CIS, 4, 0x00000000);
-
- /* 0x2c - Subsystem ID */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_SUBSYS_VENDOR_ID, 4, 0x00000000);
-
- /* 0x30 - ROM BAR, zero sized */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_ROMBAR, 4, 0xffffffff);
-
- /* 0x34 - PCI Capability */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CAP, 4, 0x00000000);
-
- /* 0x38 - Reserved */
- PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000);
-
- /* 0x3c - INT line/pin/Minimal grant/Maximal latency */
- PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); /* INT A */
-
- /* PCIE and vendor specific capability */
- pos = npu3_cfg_populate_pcie_cap(dev, 0x40, PCI_CFG_CAP);
- pos = npu3_cfg_populate_vendor_cap(dev, pos, 0x41);
- PCI_VIRT_CFG_INIT_RO(pvd, pos + 1, 1, 0);
-}
-
-static void npu3_dev_create_pvd(struct npu3_dev *dev)
-{
- struct npu3 *npu = dev->npu;
- struct phb *phb = &npu->nvlink.phb;
-
- dev->nvlink.pvd = pci_virt_add_device(phb, dev->index, 0x100, dev);
- if (!dev->nvlink.pvd)
- return;
-
- phb->scan_map |= 0x1 << GETFIELD(0xf8, dev->nvlink.pvd->bdfn);
- npu3_cfg_populate(dev);
-}
-
-static void npu3_dt_add_mmio_atsd(struct npu3 *npu)
-{
- struct dt_node *dn = npu->nvlink.phb.dt_node;
- uint64_t mmio_atsd[NPU3_XTS_ATSD_MAX];
-
- for (uint32_t i = 0; i < NPU3_XTS_ATSD_MAX; i++)
- mmio_atsd[i] = npu->regs[0] + NPU3_XTS_ATSD_LAUNCH(i);
-
- dt_add_property(dn, "ibm,mmio-atsd", mmio_atsd, sizeof(mmio_atsd));
-}
-
-static void npu3_dt_add_mmio_window(struct npu3 *npu)
-{
- struct dt_node *dn = npu->nvlink.phb.dt_node;
- uint32_t ntl0_index = npu->index * NPU3_LINKS_PER_NPU;
- uint64_t addr, size, win[2];
-
- /* Device MMIO window (NTL/GENID regs only) */
- phys_map_get(npu->chip_id, NPU_NTL, ntl0_index, &win[0], NULL);
- phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, &size);
- win[1] = addr + size - win[0];
-
- dt_add_property(dn, "ibm,mmio-window", win, sizeof(win));
- dt_add_property_cells(dn, "ranges", 0x02000000,
- hi32(win[0]), lo32(win[0]),
- hi32(win[0]), lo32(win[0]),
- hi32(win[1]), lo32(win[1]));
-}
-
-/* NDL No-Stall Event level */
-static uint32_t npu3_dev_interrupt_level(struct npu3_dev *dev)
-{
- const uint32_t level[12] = { 1, 3, 5, 7, 9, 11,
- 43, 45, 47, 49, 51, 53 };
-
- return level[npu3_chip_dev_index(dev)];
-}
-
-static void npu3_dt_add_interrupts(struct npu3 *npu)
-{
- struct dt_node *dn = npu->nvlink.phb.dt_node;
- uint32_t *map, icsp, i = 0;
- struct npu3_dev *dev;
- size_t map_size = 0;
-
- npu3_for_each_nvlink_dev(dev, npu)
- map_size += sizeof(*map) * 7;
-
- if (!map_size)
- return;
-
- icsp = get_ics_phandle();
- map = zalloc(map_size);
- assert(map);
-
- npu3_for_each_nvlink_dev(dev, npu) {
- map[i] = dev->nvlink.pvd->bdfn << 8;
- map[i + 3] = 1; /* INT A */
- map[i + 4] = icsp; /* interrupt-parent */
- map[i + 5] = npu->irq_base + npu3_dev_interrupt_level(dev);
- map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL */
- i += 7;
- }
-
- dt_add_property_cells(dn, "interrupt-parent", icsp);
- dt_add_property(dn, "interrupt-map", map, map_size);
- dt_add_property_cells(dn, "interrupt-map-mask", 0xff00, 0x0, 0x0, 0x7);
-
- free(map);
-}
-
-/* Populate PCI root device node */
-static void npu3_dt_add_props(struct npu3 *npu)
-{
- struct dt_node *dn = npu->nvlink.phb.dt_node;
-
- dt_add_property_cells(dn, "#address-cells", 3);
- dt_add_property_cells(dn, "#size-cells", 2);
- dt_add_property_cells(dn, "#interrupt-cells", 1);
- dt_add_property_cells(dn, "bus-range", 0, 0xff);
- dt_add_property_cells(dn, "clock-frequency", 0x200, 0);
-
- dt_add_property_strings(dn, "device_type", "pciex");
-
- /*
- * To the OS, npu2 and npu3 are both ibm,ioda2-npu2-phb. The added
- * ibm,ioda3-npu3-phb allows for possible quirks.
- */
- dt_add_property_strings(dn, "compatible",
- "ibm,power9-npu-pciex",
- "ibm,ioda2-npu2-phb",
- "ibm,ioda2-npu3-phb");
-
- dt_add_property_cells(dn, "ibm,phb-index",
- npu3_get_phb_index(npu->index));
- dt_add_property_cells(dn, "ibm,phb-diag-data-size", 0);
- dt_add_property_cells(dn, "ibm,opal-num-pes", NPU3_MAX_PE_NUM);
- dt_add_property_cells(dn, "ibm,opal-reserved-pe", NPU3_RESERVED_PE_NUM);
- dt_add_property_cells(dn, "ibm,supported-tce-sizes",
- 12, /* 4K */
- 16, /* 64K */
- 24, /* 16M */
- 28); /* 256M */
-
- dt_add_property_cells(dn, "ibm,chip-id", npu->chip_id);
- dt_add_property_cells(dn, "ibm,npu-index", npu->index);
- dt_add_property_cells(dn, "ibm,npcq", npu->dt_node->phandle);
- dt_add_property_cells(dn, "ibm,xscom-base", npu->xscom_base);
- dt_add_property_cells(dn, "ibm,links", NPU3_LINKS_PER_NPU);
-
- dt_add_property(dn, "reg", npu->regs, sizeof(npu->regs));
-
- npu3_dt_add_mmio_atsd(npu);
- npu3_dt_add_mmio_window(npu);
- npu3_dt_add_interrupts(npu);
-}
-
-void npu3_init_nvlink(struct npu3 *npu)
-{
- struct npu3_dev *dev;
-
- if (!npu3_next_dev(npu, NULL, NPU3_DEV_TYPE_NVLINK))
- return;
-
- npu3_init_hw(npu);
- npu3_create_phb(npu);
-
- npu3_for_each_nvlink_dev(dev, npu)
- npu3_dev_create_pvd(dev);
-
- npu3_dt_add_props(npu);
-
- /* TODO: Sort out if/why we still can't enable this */
- disable_fast_reboot("NVLink device enabled");
-}
-
-static int64_t npu3_init_context_pid(struct npu3 *npu, uint32_t index,
- uint64_t msr)
-{
- uint64_t map, old_map;
-
- /* Unfiltered XTS mode; index is lparshort */
- map = SETFIELD(NPU3_XTS_PID_MAP_LPARSHORT, 0ull, index);
-
- /* Enable this mapping for both real and virtual addresses */
- map |= NPU3_XTS_PID_MAP_VALID_ATRGPA0 | NPU3_XTS_PID_MAP_VALID_ATRGPA1;
-
- /* Enable TLBIE/MMIOSD forwarding for this entry */
- map |= NPU3_XTS_PID_MAP_VALID_ATSD;
-
- /* Set the relevant MSR bits */
- if (msr & MSR_DR)
- map |= NPU3_XTS_PID_MAP_MSR_DR;
-
- if (msr & MSR_HV)
- map |= NPU3_XTS_PID_MAP_MSR_HV;
-
- if (msr & MSR_PR)
- map |= NPU3_XTS_PID_MAP_MSR_PR;
-
- /* We don't support anything other than 64-bit so hardcode it here */
- map |= NPU3_XTS_PID_MAP_MSR_SF;
-
- old_map = npu3_read(npu, NPU3_XTS_PID_MAP(index));
-
- /* Error out if this entry is already set with different msr bits */
- if (old_map && GETFIELD(NPU3_XTS_PID_MAP_MSR, old_map) !=
- GETFIELD(NPU3_XTS_PID_MAP_MSR, map)) {
- NPU3ERR(npu, "%s: Unexpected MSR value\n", __func__);
- return OPAL_PARAMETER;
- }
-
- if (!old_map) {
- NPU3DBG(npu, "XTS_PID_MAP[%03d] = 0x%08llx\n", index, map);
- npu3_write(npu, NPU3_XTS_PID_MAP(index), map);
- }
-
- npu->nvlink.ctx_ref[index]++;
-
- return OPAL_SUCCESS;
-}
-
-#define NPU3_VALID_ATS_MSR_BITS (MSR_DR | MSR_HV | MSR_PR | MSR_SF)
-
-/*
- * Allocate a context ID and initialize the tables with the relevant
- * information. Returns the ID or error if one couldn't be allocated.
- */
-int64_t npu3_init_context(struct phb *phb, uint64_t msr, uint64_t bdf)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- uint32_t lparshort, i;
- uint64_t map;
- int64_t rc;
-
- /*
- * MSR bits should be masked by the caller to allow for future
- * expansion if required.
- */
- if (msr & ~NPU3_VALID_ATS_MSR_BITS)
- return OPAL_UNSUPPORTED;
-
- lock(&npu->lock);
-
- for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) {
- map = npu3_read(npu, NPU3_XTS_BDF_MAP(i));
-
- if (map && GETFIELD(NPU3_XTS_BDF_MAP_BDF, map) == bdf)
- break;
- }
-
- if (i == NPU3_XTS_BDF_MAP_MAX) {
- NPU3ERR(npu, "LPARID not associated with any GPU\n");
- rc = OPAL_PARAMETER;
- goto out;
- }
-
- lparshort = GETFIELD(NPU3_XTS_BDF_MAP_LPARSHORT, map);
- NPU3DBG(npu, "Found LPARSHORT 0x%x for bdf %02llx:%02llx.%llx\n",
- lparshort, PCI_BUS_NUM(bdf), PCI_DEV(bdf), PCI_FUNC(bdf));
-
- rc = npu3_init_context_pid(npu, lparshort, msr);
- if (rc)
- goto out;
-
- if (!(map & NPU3_XTS_BDF_MAP_VALID)) {
- map |= NPU3_XTS_BDF_MAP_VALID;
- npu3_write(npu, NPU3_XTS_BDF_MAP(i), map);
- }
-
- rc = lparshort;
-
-out:
- unlock(&npu->lock);
- return rc;
-}
-
-static int64_t npu3_destroy_context_pid(struct npu3 *npu, uint32_t index)
-{
- if (!npu->nvlink.ctx_ref[index])
- return OPAL_PARAMETER;
-
- /* Only destroy when refcount hits 0 */
- if (--npu->nvlink.ctx_ref[index])
- return OPAL_PARTIAL;
-
- NPU3DBG(npu, "XTS_PID_MAP[%03d] = 0 (destroy)\n", index);
- npu3_write(npu, NPU3_XTS_PID_MAP(index), 0ull);
-
- return OPAL_SUCCESS;
-}
-
-int64_t npu3_destroy_context(struct phb *phb, uint64_t bdf)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- uint32_t lparshort, i;
- int64_t map, rc;
-
- lock(&npu->lock);
-
- for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) {
- map = npu3_read(npu, NPU3_XTS_BDF_MAP(i));
-
- if (map && GETFIELD(NPU3_XTS_BDF_MAP_BDF, map) == bdf)
- break;
- }
-
- if (i == NPU3_XTS_BDF_MAP_MAX) {
- NPU3ERR(npu, "LPARID not associated with any GPU\n");
- rc = OPAL_PARAMETER;
- goto out;
- }
-
- lparshort = GETFIELD(NPU3_XTS_BDF_MAP_LPARSHORT, map);
- rc = npu3_destroy_context_pid(npu, lparshort);
-
-out:
- unlock(&npu->lock);
- return rc;
-}
-
-/* Map the given virtual bdf to lparid with given lpcr */
-int64_t npu3_map_lpar(struct phb *phb, uint64_t bdf, uint64_t lparid,
- uint64_t lpcr)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- struct npu3_dev *dev;
- int64_t rc = OPAL_SUCCESS;
- uint64_t map, val;
- uint32_t i;
-
- /*
- * The LPCR bits are only required for hash based ATS, which we don't
- * currently support, but may need to in the future.
- */
- if (lpcr)
- return OPAL_UNSUPPORTED;
-
- lock(&npu->lock);
-
- /* Update the entry if it already exists */
- for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++) {
- map = npu3_read(npu, NPU3_XTS_BDF_MAP(i));
-
- if (map && GETFIELD(NPU3_XTS_BDF_MAP_BDF, map) == bdf)
- break;
- }
-
- if (i == NPU3_XTS_BDF_MAP_MAX) {
- /* No existing mapping found, find space for a new one */
- for (i = 0; i < NPU3_XTS_BDF_MAP_MAX; i++)
- if (!npu3_read(npu, NPU3_XTS_BDF_MAP(i)))
- break;
- }
-
- if (i == NPU3_XTS_BDF_MAP_MAX) {
- NPU3ERR(npu, "No free XTS_BDF[] entry\n");
- rc = OPAL_RESOURCE;
- goto out;
- }
-
- map = NPU3_XTS_BDF_MAP_UNFILT;
- map = SETFIELD(NPU3_XTS_BDF_MAP_BDF, map, bdf);
- map = SETFIELD(NPU3_XTS_BDF_MAP_LPARID, map, lparid);
- map = SETFIELD(NPU3_XTS_BDF_MAP_LPARSHORT, map, i);
-
- /* We only support radix at the moment */
- map = SETFIELD(NPU3_XTS_BDF_MAP_XLAT, map, 0x3);
-
- /* Find a link on which to send ATSDs for this device */
- npu3_for_each_nvlink_dev(dev, npu)
- if (dev->nvlink.gpu->bdfn == bdf)
- break;
-
- if (!dev || dev->nvlink.gpu->bdfn != bdf) {
- NPU3ERR(npu, "Can't find a link for bdf %02llx:%02llx.%llx\n",
- PCI_BUS_NUM(bdf), PCI_DEV(bdf), PCI_FUNC(bdf));
- rc = OPAL_PARAMETER;
- goto out;
- }
-
- map = SETFIELD(NPU3_XTS_BDF_MAP_BRICK, map, dev->index);
-
- NPU3DBG(npu, "XTS_BDF_MAP[%03d] = 0x%08llx\n", i, map);
- npu3_write(npu, NPU3_XTS_BDF_MAP(i), map);
-
- /* We need to allocate an ATSD per link */
- val = SETFIELD(NPU3_XTS_ATSD_HYP_LPARID, 0ull, lparid);
- if (!lparid)
- val |= NPU3_XTS_ATSD_HYP_MSR_HV;
-
- npu3_write(npu, NPU3_XTS_ATSD_HYP(dev->index), val);
-
-out:
- unlock(&npu->lock);
- return rc;
-}
-
-static int64_t npu3_relaxed_order_enable(struct npu3 *npu, uint64_t src)
-{
- struct npu3_dev *dev;
- uint32_t i;
-
- for (i = 0; i < NPU3_RELAXED_SRC_MAX; i++)
- if (npu3_read(npu, NPU3_RELAXED_SRC(i)) == src)
- return OPAL_SUCCESS; /* Already enabled */
-
- /* Find somewhere to write this source */
- for (i = 0; i < NPU3_RELAXED_SRC_MAX; i++)
- if (!npu3_read(npu, NPU3_RELAXED_SRC(i)))
- break;
-
- if (i == NPU3_RELAXED_SRC_MAX) {
- NPU3ERR(npu, "Insufficient resources to activate relaxed ordering mode\n");
- return OPAL_RESOURCE;
- }
-
- npu3_write(npu, NPU3_RELAXED_SRC(i), src);
-
- npu3_for_each_nvlink_dev(dev, npu) {
- uint64_t val = npu3_read(npu, NPU3_RELAXED_CFG2(dev->index));
-
- val |= NPU3_RELAXED_CFG2_SRC_WRENA(i) |
- NPU3_RELAXED_CFG2_SRC_RDENA(i);
- npu3_write(npu, NPU3_RELAXED_CFG2(dev->index), val);
- }
-
- return OPAL_SUCCESS;
-}
-
-static void npu3_relaxed_order_disable(struct npu3 *npu, uint64_t src)
-{
- struct npu3_dev *dev;
- uint32_t i;
-
- for (i = 0; i < NPU3_RELAXED_SRC_MAX; i++)
- if (npu3_read(npu, NPU3_RELAXED_SRC(i)) == src)
- break;
-
- if (i == NPU3_RELAXED_SRC_MAX)
- return; /* Already disabled */
-
- npu3_for_each_nvlink_dev(dev, npu) {
- uint64_t val = npu3_read(npu, NPU3_RELAXED_CFG2(dev->index));
-
- val &= ~NPU3_RELAXED_CFG2_SRC_WRENA(i);
- val &= ~NPU3_RELAXED_CFG2_SRC_RDENA(i);
- npu3_write(npu, NPU3_RELAXED_CFG2(dev->index), val);
- }
-
- npu3_write(npu, NPU3_RELAXED_SRC(i), 0ull);
-}
-
-/* Enable or disable relaxed ordering on all nvlinks for a given PEC. */
-int64_t npu3_set_relaxed_order(struct phb *phb, uint32_t gcid, int pec,
- bool enable)
-{
- struct npu3 *npu = npu3_phb_to_npu(phb);
- int64_t rc = OPAL_SUCCESS;
- uint64_t src;
-
- NPU3INF(npu, "%s relaxed ordering for PEC %d on chip %d\n",
- enable ? "Enabling" : "Disabling",
- pec, gcid);
-
- lock(&npu->lock);
-
- src = SETFIELD(NPU3_RELAXED_SRC_GRPCHP, 0ull, gcid);
- src = SETFIELD(NPU3_RELAXED_SRC_PEC, src, pec);
- src = SETFIELD(NPU3_RELAXED_SRC_RDSTART, src, 0);
- src = SETFIELD(NPU3_RELAXED_SRC_RDEND, src, 47);
- src = SETFIELD(NPU3_RELAXED_SRC_WRSTART, src, 0);
- src = SETFIELD(NPU3_RELAXED_SRC_WREND, src, 23);
-
- if (enable)
- rc = npu3_relaxed_order_enable(npu, src);
- else
- npu3_relaxed_order_disable(npu, src);
-
- unlock(&npu->lock);
- return rc;
-}
diff --git a/hw/npu3.c b/hw/npu3.c
deleted file mode 100644
index 0346137..0000000
--- a/hw/npu3.c
+++ /dev/null
@@ -1,549 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-/*
- * Copyright 2019 IBM Corp.
- */
-
-#include <io.h>
-#include <xscom.h>
-#include <npu3.h>
-#include <npu3-regs.h>
-#include <nvram.h>
-#include <interrupts.h>
-#include <xive.h>
-
-#define NPU3LOG(l, npu, fmt, a...) \
- prlog(l, "NPU[%d:%d]: " fmt, (npu)->chip_id, (npu)->index, ##a)
-#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a)
-#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a)
-#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a)
-
-#define NPU3DEVLOG(l, dev, fmt, a...) \
- prlog(l, "NPU[%d:%d:%d]: " fmt, \
- (dev)->npu->chip_id, \
- (dev)->npu->index, \
- (dev)->index, ##a)
-#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a)
-#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a)
-#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a)
-
-static void npu3_dt_create_link(struct dt_node *npu, uint32_t npu_index,
- uint32_t dev_index)
-{
- struct dt_node *link;
- uint32_t phy_lane_mask, ob_chiplet;
-
- link = dt_new_addr(npu, "link", dev_index);
-
- dt_add_property_string(link, "compatible", "ibm,npu-link");
- dt_add_property_cells(link, "reg", dev_index);
- dt_add_property_cells(link, "ibm,npu-link-index", dev_index);
-
- switch (npu_index) {
- case 0:
- /* fall through */
- case 2:
- ob_chiplet = npu_index ? 3 : 0;
-
- switch (dev_index) {
- case 0:
- phy_lane_mask = PPC_BITMASK32(0, 3);
- break;
- case 1:
- phy_lane_mask = PPC_BITMASK32(13, 16);
- break;
- case 2:
- phy_lane_mask = PPC_BITMASK32(7, 10);
- break;
- case 3:
- phy_lane_mask = PPC_BITMASK32(20, 23);
- break;
- }
-
- break;
- case 1:
- switch (dev_index) {
- case 0:
- ob_chiplet = 1;
- phy_lane_mask = PPC_BITMASK32(0, 3);
- break;
- case 1:
- ob_chiplet = 2;
- phy_lane_mask = PPC_BITMASK32(0, 3);
- break;
- case 2:
- ob_chiplet = 1;
- phy_lane_mask = PPC_BITMASK32(7, 10);
- break;
- case 3:
- ob_chiplet = 2;
- phy_lane_mask = PPC_BITMASK32(7, 10);
- break;
- }
-
- break;
- default:
- return;
- }
-
- dt_add_property_cells(link, "ibm,npu-phy", ob_chiplet);
- dt_add_property_cells(link, "ibm,npu-lane-mask", phy_lane_mask);
-}
-
-static void npu3_dt_create_npu(struct dt_node *xscom, uint32_t npu_index)
-{
- const uint32_t npu_base[] = { 0x5011000, 0x5011400, 0x3011c00 };
- struct dt_node *npu;
-
- npu = dt_new_addr(xscom, "npu", npu_base[npu_index]);
-
- dt_add_property_cells(npu, "#size-cells", 0);
- dt_add_property_cells(npu, "#address-cells", 1);
- dt_add_property_cells(npu, "reg", npu_base[npu_index], 0x2c);
- dt_add_property_string(npu, "compatible", "ibm,power9-npu3");
- dt_add_property_cells(npu, "ibm,npu-index", npu_index);
-
- for (uint32_t i = 0; i < NPU3_LINKS_PER_NPU; i++)
- npu3_dt_create_link(npu, npu_index, i);
-}
-
-/* This can be removed when/if we decide to use HDAT instead */
-static bool npu3_dt_create(void)
-{
- struct proc_chip *chip = next_chip(NULL);
- struct dt_node *xscom;
-
- /* npu3 chips only */
- if (proc_gen < proc_gen_p9 ||
- chip->type == PROC_CHIP_P9_NIMBUS ||
- chip->type == PROC_CHIP_P9_CUMULUS)
- return false;
-
- dt_for_each_compatible(dt_root, xscom, "ibm,xscom")
- for (uint32_t i = 0; i < 3; i++)
- npu3_dt_create_npu(xscom, i);
-
- return true;
-}
-
-static struct npu3 *npu3_create(struct dt_node *dn)
-{
- struct npu3 *npu;
- struct dt_node *link;
- struct npu3_dev *dev;
- char *path;
- uint32_t i;
-
- npu = zalloc(sizeof(*npu));
- assert(npu);
-
- init_lock(&npu->lock);
-
- npu->dt_node = dn;
- npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
- npu->xscom_base = dt_get_address(dn, 0, NULL);
-
- npu->chip_id = dt_get_chip_id(dn);
- assert(get_chip(npu->chip_id));
-
- dt_for_each_compatible(dn, link, "ibm,npu-link") {
- i = dt_prop_get_u32(link, "ibm,npu-link-index");
- assert(i < NPU3_LINKS_PER_NPU);
-
- dev = &npu->devices[i];
- dev->index = i;
- dev->npu = npu;
- dev->dn = link;
- dev->ob_chiplet = dt_prop_get_u32(link, "ibm,npu-phy");
- dev->phy_lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
- dev->proc.status = NPU3_PROC_COMPLETE;
- };
-
- path = dt_get_path(dn);
- NPU3INF(npu, "Found %s\n", path);
- NPU3INF(npu, "SCOM base: 0x%llx\n", npu->xscom_base);
- free(path);
-
- return npu;
-}
-
-struct npu3_dev *npu3_next_dev(struct npu3 *npu, struct npu3_dev *dev,
- enum npu3_dev_type type)
-{
- uint32_t i = 0;
-
- if (dev)
- i = dev->index + 1;
-
- for (; i < NPU3_LINKS_PER_NPU; i++) {
- dev = &npu->devices[i];
-
- if (dev->type == type || type == NPU3_DEV_TYPE_ANY)
- return dev;
- }
-
- return NULL;
-}
-
-static void npu3_device_detect_fixup(struct npu3_dev *dev)
-{
- struct dt_node *dn = dev->dn;
-
- if (dev->type == NPU3_DEV_TYPE_NVLINK) {
- dt_add_property_strings(dn, "ibm,npu-link-type", "nvlink");
- dev->link_speed = dt_prop_get_u32_def(
- dn, "nvidia,link-speed", 0xff);
- return;
- }
-
- NPU3DEVDBG(dev, "Link type unknown\n");
- dt_add_property_strings(dn, "ibm,npu-link-type", "unknown");
-}
-
-/*
- * We use the indirect method because it uses the same addresses as
- * the MMIO offsets (NPU RING)
- */
-static void npu3_scom_sel(struct npu3 *npu, uint64_t reg, uint64_t size)
-{
- uint64_t val;
-
- val = SETFIELD(NPU3_MISC_DA_ADDR, 0ull, reg);
- val = SETFIELD(NPU3_MISC_DA_LEN, val, size);
- xscom_write(npu->chip_id,
- npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_ADDR,
- val);
-}
-
-static void npu3_scom_write(struct npu3 *npu, uint64_t reg, uint64_t size,
- uint64_t val)
-{
- npu3_scom_sel(npu, reg, size);
- xscom_write(npu->chip_id,
- npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA,
- val);
-}
-
-static uint64_t npu3_scom_read(struct npu3 *npu, uint64_t reg, uint64_t size)
-{
- uint64_t val;
-
- npu3_scom_sel(npu, reg, size);
- xscom_read(npu->chip_id,
- npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA,
- &val);
-
- return val;
-}
-
-void npu3_write(struct npu3 *npu, uint64_t reg, uint64_t val)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- out_be64(mmio + reg, val);
- else
- npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_8B, val);
-
- /* CQ_SM writes should be mirrored in all four blocks */
- if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0))
- return;
-
- for (uint32_t i = 1; i < 4; i++)
- npu3_write(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg),
- val);
-}
-
-uint64_t npu3_read(struct npu3 *npu, uint64_t reg)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- return in_be64(mmio + reg);
-
- return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_8B);
-}
-
-void npu3_write_4b(struct npu3 *npu, uint64_t reg, uint32_t val)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- out_be32(mmio + reg, val);
- else
- npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_4B,
- (uint64_t)val << 32);
-
- if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0))
- return;
-
- for (uint32_t i = 1; i < 4; i++)
- npu3_write_4b(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg),
- val);
-}
-
-uint32_t npu3_read_4b(struct npu3 *npu, uint64_t reg)
-{
- void *mmio = (void *)npu->regs[0];
-
- if (mmio)
- return in_be32(mmio + reg);
-
- return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_4B) >> 32;
-}
-
-static void npu3_misc_config(struct npu3 *npu)
-{
- struct npu3_dev *dev;
- uint32_t typemap = 0;
- uint64_t reg, val;
-
- npu3_for_each_nvlink_dev(dev, npu)
- typemap |= 0x10 >> dev->index;
-
- reg = NPU3_MCP_MISC_CFG0;
- val = npu3_read(npu, reg);
- val |= NPU3_MCP_MISC_CFG0_ENABLE_PBUS;
- val &= ~NPU3_MCP_MISC_CFG0_ENABLE_SNARF_CPM;
- val = SETFIELD(NPU3_MCP_MISC_CFG0_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_MCP_MISC_CFG0_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-
- reg = NPU3_SNP_MISC_CFG0;
- val = npu3_read(npu, reg);
- val |= NPU3_SNP_MISC_CFG0_ENABLE_PBUS;
- val = SETFIELD(NPU3_SNP_MISC_CFG0_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_SNP_MISC_CFG0_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-
- reg = NPU3_CTL_MISC_CFG2;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_CTL_MISC_CFG2_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_CTL_MISC_CFG2_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-
- reg = NPU3_DAT_MISC_CFG1;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_DAT_MISC_CFG1_NVLINK_MODE, val, typemap);
- val = SETFIELD(NPU3_DAT_MISC_CFG1_OCAPI_MODE, val, ~typemap);
- npu3_write(npu, reg, val);
-}
-
-static void npu3_assign_bars(struct npu3 *npu)
-{
- struct npu3_dev *dev;
- uint64_t addr, size, val;
-
- /* Global MMIO bar (per npu) */
- phys_map_get(npu->chip_id, NPU_REGS, npu->index, &addr, &size);
- val = SETFIELD(NPU3_MMIO_BAR_ADDR, 0ull, addr >> 24);
- val |= NPU3_MMIO_BAR_ENABLE;
- npu3_write(npu, NPU3_MMIO_BAR, val);
-
- NPU3INF(npu, "MMIO base: 0x%016llx (%lldMB)\n", addr, size >> 20);
- npu->regs[0] = addr;
- npu->regs[1] = size;
-
- /* NTL bar (per device) */
- npu3_for_each_dev(dev, npu) {
- phys_map_get(npu->chip_id, NPU_NTL, npu3_chip_dev_index(dev),
- &addr, &size);
- val = SETFIELD(NPU3_NTL_BAR_ADDR, 0ull, addr >> 16);
- val = SETFIELD(NPU3_NTL_BAR_SIZE, val, ilog2(size >> 16));
- npu3_write(npu, NPU3_NTL_BAR(dev->index), val);
-
- dev->ntl_bar.addr = addr;
- dev->ntl_bar.size = size;
- }
-
- /* GENID bar (logically divided per device) */
- phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, NULL);
- val = SETFIELD(NPU3_GENID_BAR_ADDR, 0ull, addr >> 19);
- npu3_write(npu, NPU3_GENID_BAR, val);
-
- npu3_for_each_dev(dev, npu) {
- dev->genid_bar.addr = addr + (dev->index << 16);
- dev->genid_bar.size = 64 << 10;
- }
-}
-
-void npu3_dev_enable_bars(struct npu3_dev *dev, bool enable)
-{
- struct npu3 *npu = dev->npu;
- uint64_t reg, val;
-
- if (dev->ntl_bar.enable == enable) /* No state change */
- return;
-
- dev->ntl_bar.enable = enable;
- dev->genid_bar.enable = enable;
-
- reg = NPU3_NTL_BAR(dev->index);
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_NTL_BAR_ENABLE, val, enable);
- npu3_write(npu, reg, val);
-
- /*
- * Generation IDs are a single space in the hardware but we split them
- * per device. Only disable in hardware if every device has disabled.
- */
- if (!enable)
- npu3_for_each_dev(dev, npu)
- if (dev->genid_bar.enable)
- return;
-
- reg = NPU3_GENID_BAR;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_GENID_BAR_ENABLE, val, enable);
- npu3_write(npu, reg, val);
-}
-
-static uint64_t npu3_ipi_attributes(struct irq_source *is, uint32_t isn)
-{
- struct npu3 *npu = is->data;
- uint32_t level = isn - npu->irq_base;
-
- /* TCE interrupt is used to detect a frozen PE */
- if (level == 18)
- return IRQ_ATTR_TARGET_OPAL |
- IRQ_ATTR_TARGET_RARE |
- IRQ_ATTR_TYPE_MSI;
-
- return IRQ_ATTR_TARGET_LINUX;
-}
-
-static void npu3_ipi_interrupt(struct irq_source *is, uint32_t isn)
-{
- struct npu3 *npu = is->data;
- uint32_t level = isn - npu->irq_base;
-
- if (level != 18) {
- NPU3ERR(npu, "Received unknown interrupt %d\n", level);
- return;
- }
-
- opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
-}
-
-#define NPU3_IRQ_LEVELS 60
-
-static char *npu3_ipi_name(struct irq_source *is, uint32_t isn)
-{
- struct npu3 *npu = is->data;
- uint32_t level = isn - npu->irq_base;
- static const char *names[NPU3_IRQ_LEVELS] = {
- [0] = "NDL 0 Stall Event (brick 0)",
- [1] = "NDL 0 No-Stall Event (brick 0)",
- [2] = "NDL 1 Stall Event (brick 1)",
- [3] = "NDL 1 No-Stall Event (brick 1)",
- [4] = "NDL 2 Stall Event (brick 2)",
- [5] = "NDL 2 No-Stall Event (brick 2)",
- [6] = "NDL 3 Stall Event (brick 3)",
- [7] = "NDL 3 No-Stall Event (brick 3)",
- [8] = "NDL 4 Stall Event (brick 4)",
- [9] = "NDL 4 No-Stall Event (brick 4)",
- [10] = "NDL 5 Stall Event (brick 5)",
- [11] = "NDL 5 No-Stall Event (brick 5)",
- [12] = "NTL 0 Event",
- [13] = "NTL 1 Event",
- [14] = "NTL 2 Event",
- [15] = "NTL 3 Event",
- [16] = "NTL 4 Event",
- [17] = "NTL 5 Event",
- [18] = "TCE Event",
- [19] = "ATS Event",
- [20] = "CQ Event",
- [21] = "MISC Event",
- [41] = "Memory Controller Event",
- [42] = "NDL 6 Stall Event (brick 6)",
- [43] = "NDL 6 No-Stall Event (brick 6)",
- [44] = "NDL 7 Stall Event (brick 7)",
- [45] = "NDL 7 No-Stall Event (brick 7)",
- [46] = "NDL 8 Stall Event (brick 8)",
- [47] = "NDL 8 No-Stall Event (brick 8)",
- [48] = "NDL 9 Stall Event (brick 9)",
- [49] = "NDL 9 No-Stall Event (brick 9)",
- [50] = "NDL 10 Stall Event (brick 10)",
- [51] = "NDL 10 No-Stall Event (brick 10)",
- [52] = "NDL 11 Stall Event (brick 11)",
- [53] = "NDL 11 No-Stall Event (brick 11)",
- [54] = "NTL 6 Event",
- [55] = "NTL 7 Event",
- [56] = "NTL 8 Event",
- [57] = "NTL 9 Event",
- [58] = "NTL 10 Event",
- [59] = "NTL 11 Event",
- };
-
- if (level >= NPU3_IRQ_LEVELS || !names[level])
- return strdup("Unknown");
-
- return strdup(names[level]);
-}
-
-static const struct irq_source_ops npu3_ipi_ops = {
- .attributes = npu3_ipi_attributes,
- .interrupt = npu3_ipi_interrupt,
- .name = npu3_ipi_name,
-};
-
-static void npu3_setup_irqs(struct npu3 *npu)
-{
- uint64_t reg, val;
- uint32_t base;
-
- base = xive_alloc_ipi_irqs(npu->chip_id, NPU3_IRQ_LEVELS, 64);
- if (base == XIVE_IRQ_ERROR) {
- NPU3ERR(npu, "Failed to allocate interrupt sources\n");
- return;
- }
-
- xive_register_ipi_source(base, NPU3_IRQ_LEVELS, npu, &npu3_ipi_ops);
-
- /* Set IPI configuration */
- reg = NPU3_MISC_CFG;
- val = npu3_read(npu, reg);
- val = SETFIELD(NPU3_MISC_CFG_IPI_PS, val, NPU3_MISC_CFG_IPI_PS_64K);
- val = SETFIELD(NPU3_MISC_CFG_IPI_OS, val, NPU3_MISC_CFG_IPI_OS_AIX);
- npu3_write(npu, reg, val);
-
- /* Set IRQ base */
- reg = NPU3_MISC_INT_BAR;
- val = SETFIELD(NPU3_MISC_INT_BAR_ADDR, 0ull,
- (uint64_t)xive_get_trigger_port(base) >> 12);
- npu3_write(npu, reg, val);
-
- npu->irq_base = base;
-}
-
-static void npu3_init(struct npu3 *npu)
-{
- struct npu3_dev *dev;
-
- platform.npu3_device_detect(npu);
- npu3_for_each_dev(dev, npu)
- npu3_device_detect_fixup(dev);
-
- npu3_misc_config(npu);
- npu3_assign_bars(npu);
- npu3_setup_irqs(npu);
- npu3_init_nvlink(npu);
-}
-
-void probe_npu3(void)
-{
- struct dt_node *dn;
- struct npu3 *npu;
-
- if (!npu3_dt_create())
- return;
-
- if (!platform.npu3_device_detect) {
- prlog(PR_INFO, "NPU: Platform does not support NPU\n");
- return;
- }
-
- dt_for_each_compatible(dt_root, dn, "ibm,power9-npu3") {
- npu = npu3_create(dn);
- npu3_init(npu);
- }
-}