diff options
-rw-r--r-- | hw/npu2.c | 139 | ||||
-rw-r--r-- | include/npu2-regs.h | 11 |
2 files changed, 149 insertions, 1 deletions
@@ -326,6 +326,138 @@ static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf, return npu2_cfg_read_bar(ndev, pcrf, offset, len, data); } +static int start_l2_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG); + int rc; + + rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH, + L2_PRD_PURGE_CMD_TYPE_MASK); + if (!rc) + rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER, + L2_PRD_PURGE_CMD_TRIGGER); + if (rc) + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask " + "failed %i\n", core_id, rc); + return rc; +} + +static int wait_l2_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t val; + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG); + unsigned long now = mftb(); + unsigned long end = now + msecs_to_tb(2); + int rc; + + while (1) { + rc = xscom_read(chip_id, addr, &val); + if (rc) { + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read " + "failed %i\n", core_id, rc); + break; + } + if (!(val & L2_PRD_PURGE_CMD_REG_BUSY)) + break; + now = mftb(); + if (tb_compare(now, end) == TB_AAFTERB) { + prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n", + core_id, rc); + return OPAL_BUSY; + } + } + + /* We have to clear the trigger bit ourselves */ + val &= ~L2_PRD_PURGE_CMD_TRIGGER; + rc = xscom_write(chip_id, addr, val); + if (rc) + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n", + core_id, rc); + return rc; +} + +static int start_l3_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG); + int rc; + + rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE, + L3_PRD_PURGE_TTYPE_MASK); + if (!rc) + rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ, + L3_PRD_PURGE_REQ); + if (rc) + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask " + "failed %i\n", core_id, rc); + return rc; +} + +static int wait_l3_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t val; + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG); + unsigned long now = mftb(); + unsigned long end = now + msecs_to_tb(2); + int rc; + + /* Trigger bit is automatically set to zero when flushing is done */ + while (1) { + rc = xscom_read(chip_id, addr, &val); + if (rc) { + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read " + "failed %i\n", core_id, rc); + break; + } + if (!(val & L3_PRD_PURGE_REQ)) + break; + now = mftb(); + if (tb_compare(now, end) == TB_AAFTERB) { + prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n", + core_id, rc); + return OPAL_BUSY; + } + } + return rc; +} + +static int64_t purge_l2_l3_caches(void) +{ + struct cpu_thread *t; + uint64_t core_id, prev_core_id = (uint64_t)-1; + int rc; + + for_each_ungarded_cpu(t) { + /* Only need to do it once per core chiplet */ + core_id = pir_to_core_id(t->pir); + if (prev_core_id == core_id) + continue; + prev_core_id = core_id; + rc = start_l2_purge(t->chip_id, core_id); + if (rc) + return rc; + rc = start_l3_purge(t->chip_id, core_id); + if (rc) + return rc; + } + + prev_core_id = (uint64_t)-1; + for_each_ungarded_cpu(t) { + /* Only need to do it once per core chiplet */ + core_id = pir_to_core_id(t->pir); + if (prev_core_id == core_id) + continue; + prev_core_id = core_id; + + rc = wait_l2_purge(t->chip_id, core_id); + if (rc) + return rc; + rc = wait_l3_purge(t->chip_id, core_id); + if (rc) + return rc; + } + return OPAL_SUCCESS; +} + static int64_t npu2_dev_cfg_exp_devcap(void *dev, struct pci_cfg_reg_filter *pcrf __unused, uint32_t offset, uint32_t size, @@ -333,6 +465,7 @@ static int64_t npu2_dev_cfg_exp_devcap(void *dev, { struct pci_virt_device *pvd = dev; struct npu2_dev *ndev = pvd->data; + int rc; assert(write); @@ -346,6 +479,10 @@ static int64_t npu2_dev_cfg_exp_devcap(void *dev, if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET) npu2_dev_procedure_reset(ndev); + rc = purge_l2_l3_caches(); + if (rc) + return rc; + return OPAL_PARTIAL; } @@ -1125,7 +1262,7 @@ static int64_t npu2_hreset(struct pci_slot *slot __unused) reset_ntl(ndev); } } - return OPAL_SUCCESS; + return purge_l2_l3_caches(); } static int64_t npu2_freset(struct pci_slot *slot __unused) diff --git a/include/npu2-regs.h b/include/npu2-regs.h index f6e349f..ae5e225 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -758,4 +758,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define OB3_ODL0_ENDPOINT_INFO 0xC010832 #define OB3_ODL1_ENDPOINT_INFO 0xC010833 +/* Registers and bits used to clear the L2 and L3 cache */ +#define L2_PRD_PURGE_CMD_REG 0x1080E +#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000 +#define L2_PRD_PURGE_CMD_TYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4) +#define L2_PRD_PURGE_CMD_TRIGGER PPC_BIT(0) +#define L2CAC_FLUSH 0x0 +#define L3_PRD_PURGE_REG 0x1180E +#define L3_PRD_PURGE_REQ PPC_BIT(0) +#define L3_PRD_PURGE_TTYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4) +#define L3_FULL_PURGE 0x0 + #endif /* __NPU2_REGS_H */ |