diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-12 07:50:24 +0800 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-13 10:29:04 +0800 |
commit | 71569cd8aba31fcb3a326c56c307d2b811417c0b (patch) | |
tree | 2a40c27ee7740f854812afae73c4080b23348b22 /hw/intc | |
parent | 94d689d0c6f23dc3129e8432c496ccb866788dbf (diff) | |
parent | 0f17ae24b53eaab4bbe9cfab267c536e2f7fdbd7 (diff) | |
download | qemu-71569cd8aba31fcb3a326c56c307d2b811417c0b.zip qemu-71569cd8aba31fcb3a326c56c307d2b811417c0b.tar.gz qemu-71569cd8aba31fcb3a326c56c307d2b811417c0b.tar.bz2 |
Merge tag 'pull-ppc-for-10.0-1-20250311' of https://gitlab.com/npiggin/qemu into staging
* Next round of XIVE patches...
* tag 'pull-ppc-for-10.0-1-20250311' of https://gitlab.com/npiggin/qemu: (72 commits)
docs/system/ppc/amigang.rst: Update for NVRAM emulation
ppc/amigaone: Add #defines for memory map constants
ppc/amigaone: Add kernel and initrd support
ppc/amigaone: Add default environment
ppc/amigaone: Implement NVRAM emulation
ppc/amigaone: Simplify replacement dummy_fw
spapr: Generate random HASHPKEYR for spapr machines
target/ppc: Avoid warning message for zero process table entries
target/ppc: Wire up BookE ATB registers for e500 family
target/ppc: fix timebase register reset state
spapr: nested: Add support for reporting Hostwide state counter
ppc: spapr: Enable 2nd DAWR on Power10 pSeries machine
ppc: Enable 2nd DAWR support on Power10 PowerNV machine
hw/ppc/epapr: Do not swap ePAPR magic value
hw/ppc/spapr: Convert DIRTY_HPTE() macro as hpte_set_dirty() method
hw/ppc/spapr: Convert CLEAN_HPTE() macro as hpte_set_clean() method
hw/ppc/spapr: Convert HPTE_DIRTY() macro as hpte_is_dirty() method
hw/ppc/spapr: Convert HPTE_VALID() macro as hpte_is_valid() method
hw/ppc/spapr: Convert HPTE() macro as hpte_get_ptr() method
target/ppc: Restrict ATTN / SCV / PMINSN helpers to TCG
...
[Fix __packed macro redefinition on FreeBSD 14 hosts:
../hw/ppc/pnv_occ.c:397:9: error: '__packed' macro redefined [-Werror,-Wmacro-redefined]
397 | #define __packed QEMU_PACKED
| ^
/usr/include/sys/cdefs.h:217:9: note: previous definition is here
217 | #define __packed __attribute__((__packed__))
| ^
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/intc')
-rw-r--r-- | hw/intc/pnv_xive.c | 10 | ||||
-rw-r--r-- | hw/intc/pnv_xive2.c | 166 | ||||
-rw-r--r-- | hw/intc/spapr_xive.c | 8 | ||||
-rw-r--r-- | hw/intc/trace-events | 6 | ||||
-rw-r--r-- | hw/intc/xive.c | 205 | ||||
-rw-r--r-- | hw/intc/xive2.c | 690 |
6 files changed, 901 insertions, 184 deletions
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index b755ddf..ccbe95a 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -1,10 +1,9 @@ /* * QEMU PowerPC XIVE interrupt controller model * - * Copyright (c) 2017-2019, IBM Corporation. + * Copyright (c) 2017-2024, IBM Corporation. * - * This code is licensed under the GPL version 2 or later. See the - * COPYING file in the top-level directory. + * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" @@ -473,7 +472,7 @@ static bool pnv_xive_is_cpu_enabled(PnvXive *xive, PowerPCCPU *cpu) static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, - bool cam_ignore, uint8_t priority, + bool crowd, bool cam_ignore, uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match) { PnvXive *xive = PNV_XIVE(xptr); @@ -500,7 +499,8 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, * Check the thread context CAM lines and record matches. */ ring = xive_presenter_tctx_match(xptr, tctx, format, nvt_blk, - nvt_idx, cam_ignore, logic_serv); + nvt_idx, cam_ignore, + logic_serv); /* * Save the context and follow on to catch duplicates, that we * don't support yet. diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index 9ed7594..0b81dad 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -1,10 +1,9 @@ /* * QEMU PowerPC XIVE2 interrupt controller model (POWER10) * - * Copyright (c) 2019-2022, IBM Corporation. + * Copyright (c) 2019-2024, IBM Corporation. * - * This code is licensed under the GPL version 2 or later. See the - * COPYING file in the top-level directory. + * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" @@ -625,7 +624,7 @@ static bool pnv_xive2_is_cpu_enabled(PnvXive2 *xive, PowerPCCPU *cpu) static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, - bool cam_ignore, uint8_t priority, + bool crowd, bool cam_ignore, uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match) { PnvXive2 *xive = PNV_XIVE2(xptr); @@ -656,25 +655,38 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, logic_serv); } else { ring = xive2_presenter_tctx_match(xptr, tctx, format, nvt_blk, - nvt_idx, cam_ignore, - logic_serv); + nvt_idx, crowd, cam_ignore, + logic_serv); } - /* - * Save the context and follow on to catch duplicates, - * that we don't support yet. - */ if (ring != -1) { - if (match->tctx) { + /* + * For VP-specific match, finding more than one is a + * problem. For group notification, it's possible. + */ + if (!cam_ignore && match->tctx) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a " "thread context NVT %x/%x\n", nvt_blk, nvt_idx); - return false; + /* Should set a FIR if we ever model it */ + return -1; + } + /* + * For a group notification, we need to know if the + * match is precluded first by checking the current + * thread priority. If the interrupt can be delivered, + * we always notify the first match (for now). + */ + if (cam_ignore && + xive2_tm_irq_precluded(tctx, ring, priority)) { + match->precluded = true; + } else { + if (!match->tctx) { + match->ring = ring; + match->tctx = tctx; + } + count++; } - - match->ring = ring; - match->tctx = tctx; - count++; } } } @@ -693,6 +705,47 @@ static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr) return cfg; } +static int pnv_xive2_broadcast(XivePresenter *xptr, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool ignore, uint8_t priority) +{ + PnvXive2 *xive = PNV_XIVE2(xptr); + PnvChip *chip = xive->chip; + int i, j; + bool gen1_tima_os = + xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; + + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pc = chip->cores[i]; + CPUCore *cc = CPU_CORE(pc); + + for (j = 0; j < cc->nr_threads; j++) { + PowerPCCPU *cpu = pc->threads[j]; + XiveTCTX *tctx; + int ring; + + if (!pnv_xive2_is_cpu_enabled(xive, cpu)) { + continue; + } + + tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); + + if (gen1_tima_os) { + ring = xive_presenter_tctx_match(xptr, tctx, 0, nvt_blk, + nvt_idx, ignore, 0); + } else { + ring = xive2_presenter_tctx_match(xptr, tctx, 0, nvt_blk, + nvt_idx, crowd, ignore, 0); + } + + if (ring != -1) { + xive2_tm_set_lsmfb(tctx, ring, priority); + } + } + } + return 0; +} + static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr) { return pnv_xive2_block_id(PNV_XIVE2(xrtr)); @@ -2149,21 +2202,40 @@ static const MemoryRegionOps pnv_xive2_tm_ops = { }, }; -static uint64_t pnv_xive2_nvc_read(void *opaque, hwaddr offset, +static uint64_t pnv_xive2_nvc_read(void *opaque, hwaddr addr, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); + XivePresenter *xptr = XIVE_PRESENTER(xive); + uint32_t page = addr >> xive->nvpg_shift; + uint16_t op = addr & 0xFFF; + uint8_t blk = pnv_xive2_block_id(xive); - xive2_error(xive, "NVC: invalid read @%"HWADDR_PRIx, offset); - return -1; + if (size != 2) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvc load size %d\n", + size); + return -1; + } + + return xive2_presenter_nvgc_backlog_op(xptr, true, blk, page, op, 1); } -static void pnv_xive2_nvc_write(void *opaque, hwaddr offset, +static void pnv_xive2_nvc_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); + XivePresenter *xptr = XIVE_PRESENTER(xive); + uint32_t page = addr >> xive->nvc_shift; + uint16_t op = addr & 0xFFF; + uint8_t blk = pnv_xive2_block_id(xive); - xive2_error(xive, "NVC: invalid write @%"HWADDR_PRIx, offset); + if (size != 1) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvc write size %d\n", + size); + return; + } + + (void)xive2_presenter_nvgc_backlog_op(xptr, true, blk, page, op, val); } static const MemoryRegionOps pnv_xive2_nvc_ops = { @@ -2171,30 +2243,63 @@ static const MemoryRegionOps pnv_xive2_nvc_ops = { .write = pnv_xive2_nvc_write, .endianness = DEVICE_BIG_ENDIAN, .valid = { - .min_access_size = 8, + .min_access_size = 1, .max_access_size = 8, }, .impl = { - .min_access_size = 8, + .min_access_size = 1, .max_access_size = 8, }, }; -static uint64_t pnv_xive2_nvpg_read(void *opaque, hwaddr offset, +static uint64_t pnv_xive2_nvpg_read(void *opaque, hwaddr addr, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); + XivePresenter *xptr = XIVE_PRESENTER(xive); + uint32_t page = addr >> xive->nvpg_shift; + uint16_t op = addr & 0xFFF; + uint32_t index = page >> 1; + uint8_t blk = pnv_xive2_block_id(xive); - xive2_error(xive, "NVPG: invalid read @%"HWADDR_PRIx, offset); - return -1; + if (size != 2) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvpg load size %d\n", + size); + return -1; + } + + if (page % 2) { + /* odd page - NVG */ + return xive2_presenter_nvgc_backlog_op(xptr, false, blk, index, op, 1); + } else { + /* even page - NVP */ + return xive2_presenter_nvp_backlog_op(xptr, blk, index, op); + } } -static void pnv_xive2_nvpg_write(void *opaque, hwaddr offset, +static void pnv_xive2_nvpg_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); + XivePresenter *xptr = XIVE_PRESENTER(xive); + uint32_t page = addr >> xive->nvpg_shift; + uint16_t op = addr & 0xFFF; + uint32_t index = page >> 1; + uint8_t blk = pnv_xive2_block_id(xive); - xive2_error(xive, "NVPG: invalid write @%"HWADDR_PRIx, offset); + if (size != 1) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid nvpg write size %d\n", + size); + return; + } + + if (page % 2) { + /* odd page - NVG */ + (void)xive2_presenter_nvgc_backlog_op(xptr, false, blk, index, op, val); + } else { + /* even page - NVP */ + (void)xive2_presenter_nvp_backlog_op(xptr, blk, index, op); + } } static const MemoryRegionOps pnv_xive2_nvpg_ops = { @@ -2202,11 +2307,11 @@ static const MemoryRegionOps pnv_xive2_nvpg_ops = { .write = pnv_xive2_nvpg_write, .endianness = DEVICE_BIG_ENDIAN, .valid = { - .min_access_size = 8, + .min_access_size = 1, .max_access_size = 8, }, .impl = { - .min_access_size = 8, + .min_access_size = 1, .max_access_size = 8, }, }; @@ -2432,6 +2537,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, void *data) xpc->match_nvt = pnv_xive2_match_nvt; xpc->get_config = pnv_xive2_presenter_get_config; + xpc->broadcast = pnv_xive2_broadcast; }; static const TypeInfo pnv_xive2_info = { diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index a764c0b..ce734b0 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -1,10 +1,9 @@ /* * QEMU PowerPC sPAPR XIVE interrupt controller model * - * Copyright (c) 2017-2018, IBM Corporation. + * Copyright (c) 2017-2024, IBM Corporation. * - * This code is licensed under the GPL version 2 or later. See the - * COPYING file in the top-level directory. + * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" @@ -431,7 +430,8 @@ static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, - bool cam_ignore, uint8_t priority, + bool crowd, bool cam_ignore, + uint8_t priority, uint32_t logic_serv, XiveTCTXMatch *match) { CPUState *cs; diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 913197a..0ba9a02 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -283,9 +283,13 @@ xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "EN xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x" xive_tctx_tm_write(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64 xive_tctx_tm_read(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64 -xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring) "found NVT 0x%x/0x%x ring=0x%x" +xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring, uint8_t group_level) "found NVT 0x%x/0x%x ring=0x%x group_level=%d" xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x/0x%x @0x%"PRIx64 +# xive2.c +xive_nvp_backlog_op(uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint8_t rc) "NVP 0x%x/0x%x operation=%d priority=%d rc=%d" +xive_nvgc_backlog_op(bool c, uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint32_t rc) "NVGC crowd=%d 0x%x/0x%x operation=%d priority=%d rc=%d" + # pnv_xive.c pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64 diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 139cfdf..c77df2c 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -3,8 +3,7 @@ * * Copyright (c) 2017-2018, IBM Corporation. * - * This code is licensed under the GPL version 2 or later. See the - * COPYING file in the top-level directory. + * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" @@ -27,28 +26,6 @@ * XIVE Thread Interrupt Management context */ -/* - * Convert an Interrupt Pending Buffer (IPB) register to a Pending - * Interrupt Priority Register (PIPR), which contains the priority of - * the most favored pending notification. - */ -static uint8_t ipb_to_pipr(uint8_t ibp) -{ - return ibp ? clz32((uint32_t)ibp << 24) : 0xff; -} - -static uint8_t exception_mask(uint8_t ring) -{ - switch (ring) { - case TM_QW1_OS: - return TM_QW1_NSR_EO; - case TM_QW3_HV_PHYS: - return TM_QW3_NSR_HE; - default: - g_assert_not_reached(); - } -} - static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring) { switch (ring) { @@ -68,11 +45,10 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) { uint8_t *regs = &tctx->regs[ring]; uint8_t nsr = regs[TM_NSR]; - uint8_t mask = exception_mask(ring); qemu_irq_lower(xive_tctx_output(tctx, ring)); - if (regs[TM_NSR] & mask) { + if (regs[TM_NSR] != 0) { uint8_t cppr = regs[TM_PIPR]; uint8_t alt_ring; uint8_t *alt_regs; @@ -87,11 +63,18 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) regs[TM_CPPR] = cppr; - /* Reset the pending buffer bit */ - alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); + /* + * If the interrupt was for a specific VP, reset the pending + * buffer bit, otherwise clear the logical server indicator + */ + if (regs[TM_NSR] & TM_NSR_GRP_LVL) { + regs[TM_NSR] &= ~TM_NSR_GRP_LVL; + } else { + alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); + } - /* Drop Exception bit */ - regs[TM_NSR] &= ~mask; + /* Drop the exception bit and any group/crowd */ + regs[TM_NSR] = 0; trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring, alt_regs[TM_IPB], regs[TM_PIPR], @@ -101,7 +84,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) return ((uint64_t)nsr << 8) | regs[TM_CPPR]; } -static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring) +void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level) { /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; @@ -111,13 +94,13 @@ static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring) if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) { switch (ring) { case TM_QW1_OS: - regs[TM_NSR] |= TM_QW1_NSR_EO; + regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F); break; case TM_QW2_HV_POOL: - alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6); + alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F); break; case TM_QW3_HV_PHYS: - regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6); + regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F); break; default: g_assert_not_reached(); @@ -159,7 +142,7 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) * Recompute the PIPR based on local pending interrupts. The PHYS * ring must take the minimum of both the PHYS and POOL PIPR values. */ - pipr_min = ipb_to_pipr(regs[TM_IPB]); + pipr_min = xive_ipb_to_pipr(regs[TM_IPB]); ring_min = ring; /* PHYS updates also depend on POOL values */ @@ -169,7 +152,7 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) /* POOL values only matter if POOL ctx is valid */ if (pool_regs[TM_WORD2] & 0x80) { - uint8_t pool_pipr = ipb_to_pipr(pool_regs[TM_IPB]); + uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]); /* * Determine highest priority interrupt and @@ -185,17 +168,27 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) regs[TM_PIPR] = pipr_min; /* CPPR has changed, check if we need to raise a pending exception */ - xive_tctx_notify(tctx, ring_min); + xive_tctx_notify(tctx, ring_min, 0); } -void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb) -{ +void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority, + uint8_t group_level) + { + /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ + uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; + uint8_t *alt_regs = &tctx->regs[alt_ring]; uint8_t *regs = &tctx->regs[ring]; - regs[TM_IPB] |= ipb; - regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]); - xive_tctx_notify(tctx, ring); -} + if (group_level == 0) { + /* VP-specific */ + regs[TM_IPB] |= xive_priority_to_ipb(priority); + alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]); + } else { + /* VP-group */ + alt_regs[TM_PIPR] = xive_priority_to_pipr(priority); + } + xive_tctx_notify(tctx, ring, group_level); + } /* * XIVE Thread Interrupt Management Area (TIMA) @@ -411,13 +404,13 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx, } /* - * Adjust the IPB to allow a CPU to process event queues of other + * Adjust the PIPR to allow a CPU to process event queues of other * priorities during one physical interrupt cycle. */ static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { - xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff)); + xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0); } static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk, @@ -495,16 +488,20 @@ static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx, /* Reset the NVT value */ nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0); xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4); + + uint8_t *regs = &tctx->regs[TM_QW1_OS]; + regs[TM_IPB] |= ipb; } + /* - * Always call xive_tctx_ipb_update(). Even if there were no + * Always call xive_tctx_pipr_update(). Even if there were no * escalation triggered, there could be a pending interrupt which * was saved when the context was pulled and that we need to take * into account by recalculating the PIPR (which is not * saved/restored). * It will also raise the External interrupt signal if needed. */ - xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb); + xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */ } /* @@ -592,7 +589,7 @@ static const XiveTmOp xive2_tm_operations[] = { * MMIOs below 2K : raw values and special operations without side * effects */ - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive2_tm_set_os_cppr, NULL }, { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, NULL }, @@ -600,7 +597,7 @@ static const XiveTmOp xive2_tm_operations[] = { NULL }, { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, xive_tm_set_os_lgs, NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive2_tm_set_hv_cppr, NULL }, { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL }, @@ -841,9 +838,9 @@ void xive_tctx_reset(XiveTCTX *tctx) * CPPR is first set. */ tctx->regs[TM_QW1_OS + TM_PIPR] = - ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]); + xive_ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]); tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] = - ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]); + xive_ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]); } static void xive_tctx_realize(DeviceState *dev, Error **errp) @@ -1658,6 +1655,54 @@ static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx) return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f)); } +uint32_t xive_get_vpgroup_size(uint32_t nvp_index) +{ + /* + * Group size is a power of 2. The position of the first 0 + * (starting with the least significant bits) in the NVP index + * gives the size of the group. + */ + return 1 << (ctz32(~nvp_index) + 1); +} + +static uint8_t xive_get_group_level(bool crowd, bool ignore, + uint32_t nvp_blk, uint32_t nvp_index) +{ + uint8_t level; + + if (!ignore) { + g_assert(!crowd); + return 0; + } + + level = (ctz32(~nvp_index) + 1) & 0b1111; + if (crowd) { + uint32_t blk; + + /* crowd level is bit position of first 0 from the right in nvp_blk */ + blk = ctz32(~nvp_blk) + 1; + + /* + * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported. + * HW will encode level 4 as the value 3. See xive2_pgofnext(). + */ + switch (level) { + case 1: + case 2: + break; + case 4: + blk = 3; + break; + default: + g_assert_not_reached(); + } + + /* Crowd level bits reside in upper 2 bits of the 6 bit group level */ + level |= blk << 4; + } + return level; +} + /* * The thread context register words are in big-endian format. */ @@ -1724,31 +1769,41 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, /* * This is our simple Xive Presenter Engine model. It is merged in the * Router as it does not require an extra object. - * - * It receives notification requests sent by the IVRE to find one - * matching NVT (or more) dispatched on the processor threads. In case - * of a single NVT notification, the process is abbreviated and the - * thread is signaled if a match is found. In case of a logical server - * notification (bits ignored at the end of the NVT identifier), the - * IVPE and IVRE select a winning thread using different filters. This - * involves 2 or 3 exchanges on the PowerBus that the model does not - * support. - * - * The parameters represent what is sent on the PowerBus */ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, - bool cam_ignore, uint8_t priority, - uint32_t logic_serv) + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, bool *precluded) { XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb); - XiveTCTXMatch match = { .tctx = NULL, .ring = 0 }; + XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false }; + uint8_t group_level; int count; /* - * Ask the machine to scan the interrupt controllers for a match + * Ask the machine to scan the interrupt controllers for a match. + * + * For VP-specific notification, we expect at most one match and + * one call to the presenters is all we need (abbreviated notify + * sequence documented by the architecture). + * + * For VP-group notification, match_nvt() is the equivalent of the + * "histogram" and "poll" commands sent to the power bus to the + * presenters. 'count' could be more than one, but we always + * select the first match for now. 'precluded' tells if (at least) + * one thread matches but can't take the interrupt now because + * it's running at a more favored priority. We return the + * information to the router so that it can take appropriate + * actions (backlog, escalation, broadcast, etc...) + * + * If we were to implement a better way of dispatching the + * interrupt in case of multiple matches (instead of the first + * match), we would need a heuristic to elect a thread (for + * example, the hardware keeps track of an 'age' in the TIMA) and + * a new command to the presenters (the equivalent of the "assign" + * power bus command in the documented full notify sequence. */ - count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, cam_ignore, + count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore, priority, logic_serv, &match); if (count < 0) { return false; @@ -1756,9 +1811,11 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, /* handle CPU exception delivery */ if (count) { - trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring); - xive_tctx_ipb_update(match.tctx, match.ring, - xive_priority_to_ipb(priority)); + group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx); + trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level); + xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level); + } else { + *precluded = match.precluded; } return !!count; @@ -1798,7 +1855,7 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) uint8_t nvt_blk; uint32_t nvt_idx; XiveNVT nvt; - bool found; + bool found, precluded; uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); @@ -1879,10 +1936,12 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) } found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx, + false /* crowd */, xive_get_field32(END_W7_F0_IGNORE, end.w7), priority, - xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7)); - + xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7), + &precluded); + /* we don't support VP-group notification on P9, so precluded is not used */ /* TODO: Auto EOI. */ if (found) { diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index fc5aed3..f8ef615 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -1,10 +1,9 @@ /* * QEMU PowerPC XIVE2 interrupt controller model (POWER10) * - * Copyright (c) 2019-2022, IBM Corporation.. + * Copyright (c) 2019-2024, IBM Corporation.. * - * This code is licensed under the GPL version 2 or later. See the - * COPYING file in the top-level directory. + * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" @@ -18,6 +17,7 @@ #include "hw/ppc/xive.h" #include "hw/ppc/xive2.h" #include "hw/ppc/xive2_regs.h" +#include "trace.h" uint32_t xive2_router_get_config(Xive2Router *xrtr) { @@ -54,7 +54,8 @@ static uint32_t xive2_nvgc_get_backlog(Xive2Nvgc *nvgc, uint8_t priority) /* * The per-priority backlog counters are 24-bit and the structure - * is stored in big endian + * is stored in big endian. NVGC is 32-bytes long, so 24-bytes from + * w2, which fits 8 priorities * 24-bits per priority. */ ptr = (uint8_t *)&nvgc->w2 + priority * 3; for (i = 0; i < 3; i++, ptr++) { @@ -63,6 +64,117 @@ static uint32_t xive2_nvgc_get_backlog(Xive2Nvgc *nvgc, uint8_t priority) return val; } +static void xive2_nvgc_set_backlog(Xive2Nvgc *nvgc, uint8_t priority, + uint32_t val) +{ + uint8_t *ptr, i; + uint32_t shift; + + if (priority > 7) { + return; + } + + if (val > 0xFFFFFF) { + val = 0xFFFFFF; + } + /* + * The per-priority backlog counters are 24-bit and the structure + * is stored in big endian + */ + ptr = (uint8_t *)&nvgc->w2 + priority * 3; + for (i = 0; i < 3; i++, ptr++) { + shift = 8 * (2 - i); + *ptr = (val >> shift) & 0xFF; + } +} + +uint64_t xive2_presenter_nvgc_backlog_op(XivePresenter *xptr, + bool crowd, + uint8_t blk, uint32_t idx, + uint16_t offset, uint16_t val) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint8_t priority = GETFIELD(NVx_BACKLOG_PRIO, offset); + uint8_t op = GETFIELD(NVx_BACKLOG_OP, offset); + Xive2Nvgc nvgc; + uint32_t count, old_count; + + if (xive2_router_get_nvgc(xrtr, crowd, blk, idx, &nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No %s %x/%x\n", + crowd ? "NVC" : "NVG", blk, idx); + return -1; + } + if (!xive2_nvgc_is_valid(&nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVG %x/%x\n", blk, idx); + return -1; + } + + old_count = xive2_nvgc_get_backlog(&nvgc, priority); + count = old_count; + /* + * op: + * 0b00 => increment + * 0b01 => decrement + * 0b1- => read + */ + if (op == 0b00 || op == 0b01) { + if (op == 0b00) { + count += val; + } else { + if (count > val) { + count -= val; + } else { + count = 0; + } + } + xive2_nvgc_set_backlog(&nvgc, priority, count); + xive2_router_write_nvgc(xrtr, crowd, blk, idx, &nvgc); + } + trace_xive_nvgc_backlog_op(crowd, blk, idx, op, priority, old_count); + return old_count; +} + +uint64_t xive2_presenter_nvp_backlog_op(XivePresenter *xptr, + uint8_t blk, uint32_t idx, + uint16_t offset) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint8_t priority = GETFIELD(NVx_BACKLOG_PRIO, offset); + uint8_t op = GETFIELD(NVx_BACKLOG_OP, offset); + Xive2Nvp nvp; + uint8_t ipb, old_ipb, rc; + + if (xive2_router_get_nvp(xrtr, blk, idx, &nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", blk, idx); + return -1; + } + if (!xive2_nvp_is_valid(&nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVP %x/%x\n", blk, idx); + return -1; + } + + old_ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2); + ipb = old_ipb; + /* + * op: + * 0b00 => set priority bit + * 0b01 => reset priority bit + * 0b1- => read + */ + if (op == 0b00 || op == 0b01) { + if (op == 0b00) { + ipb |= xive_priority_to_ipb(priority); + } else { + ipb &= ~xive_priority_to_ipb(priority); + } + nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb); + xive2_router_write_nvp(xrtr, blk, idx, &nvp, 2); + } + rc = !!(old_ipb & xive_priority_to_ipb(priority)); + trace_xive_nvp_backlog_op(blk, idx, op, priority, rc); + return rc; +} + void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf) { if (!xive2_eas_is_valid(eas)) { @@ -114,8 +226,8 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf) uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); uint32_t qentries = 1 << (qsize + 10); - uint32_t nvp_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6); - uint32_t nvp_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6); + uint32_t nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6); + uint32_t nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6); uint8_t priority = xive_get_field32(END2_W7_F0_PRIORITY, end->w7); uint8_t pq; @@ -144,7 +256,7 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf) xive2_end_is_firmware2(end) ? 'F' : '-', xive2_end_is_ignore(end) ? 'i' : '-', xive2_end_is_crowd(end) ? 'c' : '-', - priority, nvp_blk, nvp_idx); + priority, nvx_blk, nvx_idx); if (qaddr_base) { g_string_append_printf(buf, " eq:@%08"PRIx64"% 6d/%5d ^%d", @@ -255,6 +367,115 @@ static void xive2_end_enqueue(Xive2End *end, uint32_t data) end->w1 = xive_set_field32(END2_W1_PAGE_OFF, end->w1, qindex); } +static void xive2_pgofnext(uint8_t *nvgc_blk, uint32_t *nvgc_idx, + uint8_t next_level) +{ + uint32_t mask, next_idx; + uint8_t next_blk; + + /* + * Adjust the block and index of a VP for the next group/crowd + * size (PGofFirst/PGofNext field in the NVP and NVGC structures). + * + * The 6-bit group level is split into a 2-bit crowd and 4-bit + * group levels. Encoding is similar. However, we don't support + * crowd size of 8. So a crowd level of 0b11 is bumped to a crowd + * size of 16. + */ + next_blk = NVx_CROWD_LVL(next_level); + if (next_blk == 3) { + next_blk = 4; + } + mask = (1 << next_blk) - 1; + *nvgc_blk &= ~mask; + *nvgc_blk |= mask >> 1; + + next_idx = NVx_GROUP_LVL(next_level); + mask = (1 << next_idx) - 1; + *nvgc_idx &= ~mask; + *nvgc_idx |= mask >> 1; +} + +/* + * Scan the group chain and return the highest priority and group + * level of pending group interrupts. + */ +static uint8_t xive2_presenter_backlog_scan(XivePresenter *xptr, + uint8_t nvx_blk, uint32_t nvx_idx, + uint8_t first_group, + uint8_t *out_level) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint32_t nvgc_idx; + uint32_t current_level, count; + uint8_t nvgc_blk, prio; + Xive2Nvgc nvgc; + + for (prio = 0; prio <= XIVE_PRIORITY_MAX; prio++) { + current_level = first_group & 0x3F; + nvgc_blk = nvx_blk; + nvgc_idx = nvx_idx; + + while (current_level) { + xive2_pgofnext(&nvgc_blk, &nvgc_idx, current_level); + + if (xive2_router_get_nvgc(xrtr, NVx_CROWD_LVL(current_level), + nvgc_blk, nvgc_idx, &nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVGC %x/%x\n", + nvgc_blk, nvgc_idx); + return 0xFF; + } + if (!xive2_nvgc_is_valid(&nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVGC %x/%x\n", + nvgc_blk, nvgc_idx); + return 0xFF; + } + + count = xive2_nvgc_get_backlog(&nvgc, prio); + if (count) { + *out_level = current_level; + return prio; + } + current_level = xive_get_field32(NVGC2_W0_PGONEXT, nvgc.w0) & 0x3F; + } + } + return 0xFF; +} + +static void xive2_presenter_backlog_decr(XivePresenter *xptr, + uint8_t nvx_blk, uint32_t nvx_idx, + uint8_t group_prio, + uint8_t group_level) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint32_t nvgc_idx, count; + uint8_t nvgc_blk; + Xive2Nvgc nvgc; + + nvgc_blk = nvx_blk; + nvgc_idx = nvx_idx; + xive2_pgofnext(&nvgc_blk, &nvgc_idx, group_level); + + if (xive2_router_get_nvgc(xrtr, NVx_CROWD_LVL(group_level), + nvgc_blk, nvgc_idx, &nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVGC %x/%x\n", + nvgc_blk, nvgc_idx); + return; + } + if (!xive2_nvgc_is_valid(&nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid NVGC %x/%x\n", + nvgc_blk, nvgc_idx); + return; + } + count = xive2_nvgc_get_backlog(&nvgc, group_prio); + if (!count) { + return; + } + xive2_nvgc_set_backlog(&nvgc, group_prio, count - 1); + xive2_router_write_nvgc(xrtr, NVx_CROWD_LVL(group_level), + nvgc_blk, nvgc_idx, &nvgc); +} + /* * XIVE Thread Interrupt Management Area (TIMA) - Gen2 mode * @@ -313,7 +534,19 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, regs[TM_IPB]); nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, regs[TM_CPPR]); - nvp.w2 = xive_set_field32(NVP2_W2_LSMFB, nvp.w2, regs[TM_LSMFB]); + if (nvp.w0 & NVP2_W0_L) { + /* + * Typically not used. If LSMFB is restored with 0, it will + * force a backlog rescan + */ + nvp.w2 = xive_set_field32(NVP2_W2_LSMFB, nvp.w2, regs[TM_LSMFB]); + } + if (nvp.w0 & NVP2_W0_G) { + nvp.w2 = xive_set_field32(NVP2_W2_LGS, nvp.w2, regs[TM_LGS]); + } + if (nvp.w0 & NVP2_W0_T) { + nvp.w2 = xive_set_field32(NVP2_W2_T, nvp.w2, regs[TM_T]); + } xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); nvp.w1 = xive_set_field32(NVP2_W1_CO, nvp.w1, 0); @@ -527,7 +760,9 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 2); tctx->regs[TM_QW1_OS + TM_CPPR] = cppr; - /* we don't model LSMFB */ + tctx->regs[TM_QW1_OS + TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2); + tctx->regs[TM_QW1_OS + TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2); + tctx->regs[TM_QW1_OS + TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2); nvp->w1 = xive_set_field32(NVP2_W1_CO, nvp->w1, 1); nvp->w1 = xive_set_field32(NVP2_W1_CO_THRID_VALID, nvp->w1, 1); @@ -550,8 +785,15 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, uint8_t nvp_blk, uint32_t nvp_idx, bool do_restore) { - Xive2Nvp nvp; + XivePresenter *xptr = XIVE_PRESENTER(xrtr); uint8_t ipb; + uint8_t backlog_level; + uint8_t group_level; + uint8_t first_group; + uint8_t backlog_prio; + uint8_t group_prio; + uint8_t *regs = &tctx->regs[TM_QW1_OS]; + Xive2Nvp nvp; /* * Grab the associated thread interrupt context registers in the @@ -580,15 +822,29 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, 0); xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); } + regs[TM_IPB] |= ipb; + backlog_prio = xive_ipb_to_pipr(ipb); + backlog_level = 0; + + first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0); + if (first_group && regs[TM_LSMFB] < backlog_prio) { + group_prio = xive2_presenter_backlog_scan(xptr, nvp_blk, nvp_idx, + first_group, &group_level); + regs[TM_LSMFB] = group_prio; + if (regs[TM_LGS] && group_prio < backlog_prio) { + /* VP can take a group interrupt */ + xive2_presenter_backlog_decr(xptr, nvp_blk, nvp_idx, + group_prio, group_level); + backlog_prio = group_prio; + backlog_level = group_level; + } + } + /* - * Always call xive_tctx_ipb_update(). Even if there were no - * escalation triggered, there could be a pending interrupt which - * was saved when the context was pulled and that we need to take - * into account by recalculating the PIPR (which is not - * saved/restored). - * It will also raise the External interrupt signal if needed. + * Compute the PIPR based on the restored state. + * It will raise the External interrupt signal if needed. */ - xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb); + xive_tctx_pipr_update(tctx, TM_QW1_OS, backlog_prio, backlog_level); } /* @@ -630,6 +886,172 @@ void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, } } +static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring, + uint32_t *nvp_blk, uint32_t *nvp_idx) +{ + uint32_t w2, cam; + + w2 = xive_tctx_word2(&tctx->regs[ring]); + switch (ring) { + case TM_QW1_OS: + if (!(be32_to_cpu(w2) & TM2_QW1W2_VO)) { + return -1; + } + cam = xive_get_field32(TM2_QW1W2_OS_CAM, w2); + break; + case TM_QW2_HV_POOL: + if (!(be32_to_cpu(w2) & TM2_QW2W2_VP)) { + return -1; + } + cam = xive_get_field32(TM2_QW2W2_POOL_CAM, w2); + break; + case TM_QW3_HV_PHYS: + if (!(be32_to_cpu(w2) & TM2_QW3W2_VT)) { + return -1; + } + cam = xive2_tctx_hw_cam_line(tctx->xptr, tctx); + break; + default: + return -1; + } + *nvp_blk = xive2_nvp_blk(cam); + *nvp_idx = xive2_nvp_idx(cam); + return 0; +} + +static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) +{ + uint8_t *regs = &tctx->regs[ring]; + Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); + uint8_t old_cppr, backlog_prio, first_group, group_level = 0; + uint8_t pipr_min, lsmfb_min, ring_min; + bool group_enabled; + uint32_t nvp_blk, nvp_idx; + Xive2Nvp nvp; + int rc; + + trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring, + regs[TM_IPB], regs[TM_PIPR], + cppr, regs[TM_NSR]); + + if (cppr > XIVE_PRIORITY_MAX) { + cppr = 0xff; + } + + old_cppr = regs[TM_CPPR]; + regs[TM_CPPR] = cppr; + + /* + * Recompute the PIPR based on local pending interrupts. It will + * be adjusted below if needed in case of pending group interrupts. + */ + pipr_min = xive_ipb_to_pipr(regs[TM_IPB]); + group_enabled = !!regs[TM_LGS]; + lsmfb_min = (group_enabled) ? regs[TM_LSMFB] : 0xff; + ring_min = ring; + + /* PHYS updates also depend on POOL values */ + if (ring == TM_QW3_HV_PHYS) { + uint8_t *pregs = &tctx->regs[TM_QW2_HV_POOL]; + + /* POOL values only matter if POOL ctx is valid */ + if (pregs[TM_WORD2] & 0x80) { + + uint8_t pool_pipr = xive_ipb_to_pipr(pregs[TM_IPB]); + uint8_t pool_lsmfb = pregs[TM_LSMFB]; + + /* + * Determine highest priority interrupt and + * remember which ring has it. + */ + if (pool_pipr < pipr_min) { + pipr_min = pool_pipr; + if (pool_pipr < lsmfb_min) { + ring_min = TM_QW2_HV_POOL; + } + } + + /* Values needed for group priority calculation */ + if (pregs[TM_LGS] && (pool_lsmfb < lsmfb_min)) { + group_enabled = true; + lsmfb_min = pool_lsmfb; + if (lsmfb_min < pipr_min) { + ring_min = TM_QW2_HV_POOL; + } + } + } + } + regs[TM_PIPR] = pipr_min; + + rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx); + if (rc) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid context\n"); + return; + } + + if (cppr < old_cppr) { + /* + * FIXME: check if there's a group interrupt being presented + * and if the new cppr prevents it. If so, then the group + * interrupt needs to be re-added to the backlog and + * re-triggered (see re-trigger END info in the NVGC + * structure) + */ + } + + if (group_enabled && + lsmfb_min < cppr && + lsmfb_min < regs[TM_PIPR]) { + /* + * Thread has seen a group interrupt with a higher priority + * than the new cppr or pending local interrupt. Check the + * backlog + */ + if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + if (!xive2_nvp_is_valid(&nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0); + if (!first_group) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + backlog_prio = xive2_presenter_backlog_scan(tctx->xptr, + nvp_blk, nvp_idx, + first_group, &group_level); + tctx->regs[ring_min + TM_LSMFB] = backlog_prio; + if (backlog_prio != 0xFF) { + xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx, + backlog_prio, group_level); + regs[TM_PIPR] = backlog_prio; + } + } + /* CPPR has changed, check if we need to raise a pending exception */ + xive_tctx_notify(tctx, ring_min, group_level); +} + +void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff); +} + +void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff); +} + static void xive2_tctx_set_target(XiveTCTX *tctx, uint8_t ring, uint8_t target) { uint8_t *regs = &tctx->regs[ring]; @@ -723,13 +1145,46 @@ int xive2_router_write_nvgc(Xive2Router *xrtr, bool crowd, return xrc->write_nvgc(xrtr, crowd, nvgc_blk, nvgc_idx, nvgc); } +static bool xive2_vp_match_mask(uint32_t cam1, uint32_t cam2, + uint32_t vp_mask) +{ + return (cam1 & vp_mask) == (cam2 & vp_mask); +} + +static uint8_t xive2_get_vp_block_mask(uint32_t nvt_blk, bool crowd) +{ + uint8_t size, block_mask = 0b1111; + + /* 3 supported crowd sizes: 2, 4, 16 */ + if (crowd) { + size = xive_get_vpgroup_size(nvt_blk); + if (size == 8) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd size of 8n"); + return block_mask; + } + block_mask &= ~(size - 1); + } + return block_mask; +} + +static uint32_t xive2_get_vp_index_mask(uint32_t nvt_index, bool cam_ignore) +{ + uint32_t index_mask = 0xFFFFFF; /* 24 bits */ + + if (cam_ignore) { + index_mask &= ~(xive_get_vpgroup_size(nvt_index) - 1); + } + return index_mask; +} + /* * The thread context register words are in big-endian format. */ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, - bool cam_ignore, uint32_t logic_serv) + bool crowd, bool cam_ignore, + uint32_t logic_serv) { uint32_t cam = xive2_nvp_cam_line(nvt_blk, nvt_idx); uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]); @@ -737,44 +1192,51 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]); uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]); - /* - * TODO (PowerNV): ignore mode. The low order bits of the NVT - * identifier are ignored in the "CAM" match. - */ + uint32_t index_mask, vp_mask; + uint8_t block_mask; if (format == 0) { - if (cam_ignore == true) { - /* - * F=0 & i=1: Logical server notification (bits ignored at - * the end of the NVT identifier) - */ - qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n", - nvt_blk, nvt_idx); - return -1; - } + /* + * i=0: Specific NVT notification + * i=1: VP-group notification (bits ignored at the end of the + * NVT identifier) + */ + block_mask = xive2_get_vp_block_mask(nvt_blk, crowd); + index_mask = xive2_get_vp_index_mask(nvt_idx, cam_ignore); + vp_mask = xive2_nvp_cam_line(block_mask, index_mask); - /* F=0 & i=0: Specific NVT notification */ + /* For VP-group notifications, threads with LGS=0 are excluded */ /* PHYS ring */ if ((be32_to_cpu(qw3w2) & TM2_QW3W2_VT) && - cam == xive2_tctx_hw_cam_line(xptr, tctx)) { + !(cam_ignore && tctx->regs[TM_QW3_HV_PHYS + TM_LGS] == 0) && + xive2_vp_match_mask(cam, + xive2_tctx_hw_cam_line(xptr, tctx), + vp_mask)) { return TM_QW3_HV_PHYS; } /* HV POOL ring */ if ((be32_to_cpu(qw2w2) & TM2_QW2W2_VP) && - cam == xive_get_field32(TM2_QW2W2_POOL_CAM, qw2w2)) { + !(cam_ignore && tctx->regs[TM_QW2_HV_POOL + TM_LGS] == 0) && + xive2_vp_match_mask(cam, + xive_get_field32(TM2_QW2W2_POOL_CAM, qw2w2), + vp_mask)) { return TM_QW2_HV_POOL; } /* OS ring */ if ((be32_to_cpu(qw1w2) & TM2_QW1W2_VO) && - cam == xive_get_field32(TM2_QW1W2_OS_CAM, qw1w2)) { + !(cam_ignore && tctx->regs[TM_QW1_OS + TM_LGS] == 0) && + xive2_vp_match_mask(cam, + xive_get_field32(TM2_QW1W2_OS_CAM, qw1w2), + vp_mask)) { return TM_QW1_OS; } } else { /* F=1 : User level Event-Based Branch (EBB) notification */ + /* FIXME: what if cam_ignore and LGS = 0 ? */ /* USER ring */ if ((be32_to_cpu(qw1w2) & TM2_QW1W2_VO) && (cam == xive_get_field32(TM2_QW1W2_OS_CAM, qw1w2)) && @@ -786,6 +1248,37 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, return -1; } +bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority) +{ + /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ + uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; + uint8_t *alt_regs = &tctx->regs[alt_ring]; + + /* + * The xive2_presenter_tctx_match() above tells if there's a match + * but for VP-group notification, we still need to look at the + * priority to know if the thread can take the interrupt now or if + * it is precluded. + */ + if (priority < alt_regs[TM_CPPR]) { + return false; + } + return true; +} + +void xive2_tm_set_lsmfb(XiveTCTX *tctx, int ring, uint8_t priority) +{ + uint8_t *regs = &tctx->regs[ring]; + + /* + * Called by the router during a VP-group notification when the + * thread matches but can't take the interrupt because it's + * already running at a more favored priority. It then stores the + * new interrupt priority in the LSMFB field. + */ + regs[TM_LSMFB] = priority; +} + static void xive2_router_realize(DeviceState *dev, Error **errp) { Xive2Router *xrtr = XIVE2_ROUTER(dev); @@ -825,10 +1318,9 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, Xive2End end; uint8_t priority; uint8_t format; - bool found; - Xive2Nvp nvp; - uint8_t nvp_blk; - uint32_t nvp_idx; + bool found, precluded; + uint8_t nvx_blk; + uint32_t nvx_idx; /* END cache lookup */ if (xive2_router_get_end(xrtr, end_blk, end_idx, &end)) { @@ -843,6 +1335,12 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } + if (xive2_end_is_crowd(&end) & !xive2_end_is_ignore(&end)) { + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: invalid END, 'crowd' bit requires 'ignore' bit\n"); + return; + } + if (xive2_end_is_enqueue(&end)) { xive2_end_enqueue(&end, end_data); /* Enqueuing event data modifies the EQ toggle and index */ @@ -887,26 +1385,14 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, /* * Follows IVPE notification */ - nvp_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6); - nvp_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6); + nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6); + nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6); - /* NVP cache lookup */ - if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVP %x/%x\n", - nvp_blk, nvp_idx); - return; - } - - if (!xive2_nvp_is_valid(&nvp)) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVP %x/%x is invalid\n", - nvp_blk, nvp_idx); - return; - } - - found = xive_presenter_notify(xrtr->xfb, format, nvp_blk, nvp_idx, - xive2_end_is_ignore(&end), + found = xive_presenter_notify(xrtr->xfb, format, nvx_blk, nvx_idx, + xive2_end_is_crowd(&end), xive2_end_is_ignore(&end), priority, - xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7)); + xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7), + &precluded); /* TODO: Auto EOI. */ @@ -917,10 +1403,9 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, /* * If no matching NVP is dispatched on a HW thread : * - specific VP: update the NVP structure if backlog is activated - * - logical server : forward request to IVPE (not supported) + * - VP-group: update the backlog counter for that priority in the NVG */ if (xive2_end_is_backlog(&end)) { - uint8_t ipb; if (format == 1) { qemu_log_mask(LOG_GUEST_ERROR, @@ -929,19 +1414,82 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } - /* - * Record the IPB in the associated NVP structure for later - * use. The presenter will resend the interrupt when the vCPU - * is dispatched again on a HW thread. - */ - ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) | - xive_priority_to_ipb(priority); - nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb); - xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); + if (!xive2_end_is_ignore(&end)) { + uint8_t ipb; + Xive2Nvp nvp; - /* - * On HW, follows a "Broadcast Backlog" to IVPEs - */ + /* NVP cache lookup */ + if (xive2_router_get_nvp(xrtr, nvx_blk, nvx_idx, &nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVP %x/%x\n", + nvx_blk, nvx_idx); + return; + } + + if (!xive2_nvp_is_valid(&nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVP %x/%x is invalid\n", + nvx_blk, nvx_idx); + return; + } + + /* + * Record the IPB in the associated NVP structure for later + * use. The presenter will resend the interrupt when the vCPU + * is dispatched again on a HW thread. + */ + ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2) | + xive_priority_to_ipb(priority); + nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, ipb); + xive2_router_write_nvp(xrtr, nvx_blk, nvx_idx, &nvp, 2); + } else { + Xive2Nvgc nvgc; + uint32_t backlog; + bool crowd; + + crowd = xive2_end_is_crowd(&end); + + /* + * For groups and crowds, the per-priority backlog + * counters are stored in the NVG/NVC structures + */ + if (xive2_router_get_nvgc(xrtr, crowd, + nvx_blk, nvx_idx, &nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no %s %x/%x\n", + crowd ? "NVC" : "NVG", nvx_blk, nvx_idx); + return; + } + + if (!xive2_nvgc_is_valid(&nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVG %x/%x is invalid\n", + nvx_blk, nvx_idx); + return; + } + + /* + * Increment the backlog counter for that priority. + * We only call broadcast the first time the counter is + * incremented. broadcast will set the LSMFB field of the TIMA of + * relevant threads so that they know an interrupt is pending. + */ + backlog = xive2_nvgc_get_backlog(&nvgc, priority) + 1; + xive2_nvgc_set_backlog(&nvgc, priority, backlog); + xive2_router_write_nvgc(xrtr, crowd, nvx_blk, nvx_idx, &nvgc); + + if (backlog == 1) { + XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb); + xfc->broadcast(xrtr->xfb, nvx_blk, nvx_idx, + xive2_end_is_crowd(&end), + xive2_end_is_ignore(&end), + priority); + + if (!xive2_end_is_precluded_escalation(&end)) { + /* + * The interrupt will be picked up when the + * matching thread lowers its priority level + */ + return; + } + } + } } do_escalation: |