From 8279e529d524bce56fca20f6ece0e6f92b0f7aba Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Wed, 17 Jul 2019 15:44:22 -0500 Subject: npu2: Prepare purge_l2_l3_caches() for reuse Move this to a separate compilation unit with its own header, for reuse. The code formerly in npu2.c is copied verbatim. The #defines formerly in npu2-regs.h have been reformatted and changed to use PPC_BITMASK() instead of multiple consecutive PPC_BIT()s. Signed-off-by: Reza Arbab Reviewed-by: Andrew Donnellan Reviewed-by: Alexey Kardashevskiy Reviewed-by: Stewart Smith Signed-off-by: Oliver O'Halloran --- hw/Makefile.inc | 2 +- hw/cache-p9.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/npu2.c | 139 +---------------------------------------- include/cache-p9.h | 22 +++++++ include/npu2-regs.h | 13 ---- 5 files changed, 197 insertions(+), 152 deletions(-) create mode 100644 hw/cache-p9.c create mode 100644 include/cache-p9.h diff --git a/hw/Makefile.inc b/hw/Makefile.inc index 2f2feb9..1ab1f40 100644 --- a/hw/Makefile.inc +++ b/hw/Makefile.inc @@ -7,7 +7,7 @@ HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o -HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o +HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o cache-p9.o HW=hw/built-in.a include $(SRC)/hw/fsp/Makefile.inc diff --git a/hw/cache-p9.c b/hw/cache-p9.c new file mode 100644 index 0000000..ea6f00b --- /dev/null +++ b/hw/cache-p9.c @@ -0,0 +1,173 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +/* Registers and bits used to clear the L2 and L3 cache */ +#define L2_PRD_PURGE_CMD_REG 0x1080e +#define L2_PRD_PURGE_CMD_TRIGGER PPC_BIT(0) +#define L2_PRD_PURGE_CMD_TYPE_MASK PPC_BITMASK(1, 4) +#define L2CAC_FLUSH 0x0 +#define L2_PRD_PURGE_CMD_REG_BUSY PPC_BIT(9) +#define L3_PRD_PURGE_REG 0x1180e +#define L3_PRD_PURGE_REQ PPC_BIT(0) +#define L3_PRD_PURGE_TTYPE_MASK PPC_BITMASK(1, 4) +#define L3_FULL_PURGE 0x0 + +#define L2_L3_PRD_PURGE_TIMEOUT_MS 20 + +static int start_l2_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG); + int rc; + + rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH, + L2_PRD_PURGE_CMD_TYPE_MASK); + if (!rc) + rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER, + L2_PRD_PURGE_CMD_TRIGGER); + if (rc) + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask " + "failed %i\n", core_id, rc); + return rc; +} + +static int wait_l2_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t val; + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG); + unsigned long now = mftb(); + unsigned long end = now + msecs_to_tb(L2_L3_PRD_PURGE_TIMEOUT_MS); + int rc; + + while (1) { + rc = xscom_read(chip_id, addr, &val); + if (rc) { + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read " + "failed %i\n", core_id, rc); + break; + } + if (!(val & L2_PRD_PURGE_CMD_REG_BUSY)) + break; + now = mftb(); + if (tb_compare(now, end) == TB_AAFTERB) { + prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n", + core_id, rc); + return OPAL_BUSY; + } + } + + /* We have to clear the trigger bit ourselves */ + val &= ~L2_PRD_PURGE_CMD_TRIGGER; + rc = xscom_write(chip_id, addr, val); + if (rc) + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n", + core_id, rc); + return rc; +} + +static int start_l3_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG); + int rc; + + rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE, + L3_PRD_PURGE_TTYPE_MASK); + if (!rc) + rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ, + L3_PRD_PURGE_REQ); + if (rc) + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask " + "failed %i\n", core_id, rc); + return rc; +} + +static int wait_l3_purge(uint32_t chip_id, uint32_t core_id) +{ + uint64_t val; + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG); + unsigned long now = mftb(); + unsigned long end = now + msecs_to_tb(L2_L3_PRD_PURGE_TIMEOUT_MS); + int rc; + + /* Trigger bit is automatically set to zero when flushing is done */ + while (1) { + rc = xscom_read(chip_id, addr, &val); + if (rc) { + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read " + "failed %i\n", core_id, rc); + break; + } + if (!(val & L3_PRD_PURGE_REQ)) + break; + now = mftb(); + if (tb_compare(now, end) == TB_AAFTERB) { + prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n", + core_id, rc); + return OPAL_BUSY; + } + } + return rc; +} + +int64_t purge_l2_l3_caches(void) +{ + struct cpu_thread *t; + uint64_t core_id, prev_core_id = (uint64_t)-1; + int rc; + unsigned long now = mftb(); + + for_each_ungarded_cpu(t) { + /* Only need to do it once per core chiplet */ + core_id = pir_to_core_id(t->pir); + if (prev_core_id == core_id) + continue; + prev_core_id = core_id; + rc = start_l2_purge(t->chip_id, core_id); + if (rc) + goto trace_exit; + rc = start_l3_purge(t->chip_id, core_id); + if (rc) + goto trace_exit; + } + + prev_core_id = (uint64_t)-1; + for_each_ungarded_cpu(t) { + /* Only need to do it once per core chiplet */ + core_id = pir_to_core_id(t->pir); + if (prev_core_id == core_id) + continue; + prev_core_id = core_id; + + rc = wait_l2_purge(t->chip_id, core_id); + if (rc) + goto trace_exit; + rc = wait_l3_purge(t->chip_id, core_id); + if (rc) + goto trace_exit; + } + +trace_exit: + prlog(PR_TRACE, "L2/L3 purging took %ldus\n", + tb_to_usecs(mftb() - now)); + + return rc; +} diff --git a/hw/npu2.c b/hw/npu2.c index c7ce443..a904ccb 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -28,6 +28,7 @@ #include #include #include +#include #define VENDOR_CAP_START 0x80 #define VENDOR_CAP_END 0x90 @@ -307,144 +308,6 @@ static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf, return npu2_cfg_read_bar(ndev, pcrf, offset, len, data); } -static int start_l2_purge(uint32_t chip_id, uint32_t core_id) -{ - uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG); - int rc; - - rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH, - L2_PRD_PURGE_CMD_TYPE_MASK); - if (!rc) - rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER, - L2_PRD_PURGE_CMD_TRIGGER); - if (rc) - prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask " - "failed %i\n", core_id, rc); - return rc; -} - -static int wait_l2_purge(uint32_t chip_id, uint32_t core_id) -{ - uint64_t val; - uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG); - unsigned long now = mftb(); - unsigned long end = now + msecs_to_tb(L2_L3_PRD_PURGE_TIMEOUT_MS); - int rc; - - while (1) { - rc = xscom_read(chip_id, addr, &val); - if (rc) { - prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read " - "failed %i\n", core_id, rc); - break; - } - if (!(val & L2_PRD_PURGE_CMD_REG_BUSY)) - break; - now = mftb(); - if (tb_compare(now, end) == TB_AAFTERB) { - prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n", - core_id, rc); - return OPAL_BUSY; - } - } - - /* We have to clear the trigger bit ourselves */ - val &= ~L2_PRD_PURGE_CMD_TRIGGER; - rc = xscom_write(chip_id, addr, val); - if (rc) - prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n", - core_id, rc); - return rc; -} - -static int start_l3_purge(uint32_t chip_id, uint32_t core_id) -{ - uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG); - int rc; - - rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE, - L3_PRD_PURGE_TTYPE_MASK); - if (!rc) - rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ, - L3_PRD_PURGE_REQ); - if (rc) - prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask " - "failed %i\n", core_id, rc); - return rc; -} - -static int wait_l3_purge(uint32_t chip_id, uint32_t core_id) -{ - uint64_t val; - uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG); - unsigned long now = mftb(); - unsigned long end = now + msecs_to_tb(L2_L3_PRD_PURGE_TIMEOUT_MS); - int rc; - - /* Trigger bit is automatically set to zero when flushing is done */ - while (1) { - rc = xscom_read(chip_id, addr, &val); - if (rc) { - prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read " - "failed %i\n", core_id, rc); - break; - } - if (!(val & L3_PRD_PURGE_REQ)) - break; - now = mftb(); - if (tb_compare(now, end) == TB_AAFTERB) { - prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n", - core_id, rc); - return OPAL_BUSY; - } - } - return rc; -} - -static int64_t purge_l2_l3_caches(void) -{ - struct cpu_thread *t; - uint64_t core_id, prev_core_id = (uint64_t)-1; - int rc; - unsigned long now = mftb(); - - for_each_ungarded_cpu(t) { - /* Only need to do it once per core chiplet */ - core_id = pir_to_core_id(t->pir); - if (prev_core_id == core_id) - continue; - prev_core_id = core_id; - rc = start_l2_purge(t->chip_id, core_id); - if (rc) - goto trace_exit; - rc = start_l3_purge(t->chip_id, core_id); - if (rc) - goto trace_exit; - } - - prev_core_id = (uint64_t)-1; - for_each_ungarded_cpu(t) { - /* Only need to do it once per core chiplet */ - core_id = pir_to_core_id(t->pir); - if (prev_core_id == core_id) - continue; - prev_core_id = core_id; - - rc = wait_l2_purge(t->chip_id, core_id); - if (rc) - goto trace_exit; - rc = wait_l3_purge(t->chip_id, core_id); - if (rc) - goto trace_exit; - } - -trace_exit: - prlog(PR_TRACE, "L2/L3 purging took %ldus\n", - tb_to_usecs(mftb() - now)); - - return rc; -} - static int64_t npu2_dev_cfg_exp_devcap(void *dev, struct pci_cfg_reg_filter *pcrf __unused, uint32_t offset, uint32_t size, diff --git a/include/cache-p9.h b/include/cache-p9.h new file mode 100644 index 0000000..e763433 --- /dev/null +++ b/include/cache-p9.h @@ -0,0 +1,22 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CACHE_P9_H +#define __CACHE_P9_H + +int64_t purge_l2_l3_caches(void); + +#endif diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 847fc2c..ce9fdbb 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -774,19 +774,6 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define OB_ODL_ENDPOINT_INFO(brick_index) \ (0x9010832 + OB_ODL_OFFSET(brick_index)) -/* Registers and bits used to clear the L2 and L3 cache */ -#define L2_PRD_PURGE_CMD_REG 0x1080E -#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000UL -#define L2_PRD_PURGE_CMD_TYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4) -#define L2_PRD_PURGE_CMD_TRIGGER PPC_BIT(0) -#define L2CAC_FLUSH 0x0 -#define L3_PRD_PURGE_REG 0x1180E -#define L3_PRD_PURGE_REQ PPC_BIT(0) -#define L3_PRD_PURGE_TTYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4) -#define L3_FULL_PURGE 0x0 - -#define L2_L3_PRD_PURGE_TIMEOUT_MS 20 - /* Config registers for NPU2 */ #define NPU_STCK0_CS_SM0_MISC_CONFIG0 0x5011000 #define NPU_STCK0_CS_SM1_MISC_CONFIG0 0x5011030 -- cgit v1.1