aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-12-21 15:49:59 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-12-21 15:49:59 +0000
commit891ff9f4a371da2dbd5244590eb35e8d803e18d8 (patch)
tree53894f01afaf3b60e01874dae105d15fd2dd48bb /hw
parent15763776bfc1017adfded6afaebe220bca582923 (diff)
parentb62c6e1237fb5ca2563f7e72b66ac0c40ff7a714 (diff)
downloadqemu-891ff9f4a371da2dbd5244590eb35e8d803e18d8.zip
qemu-891ff9f4a371da2dbd5244590eb35e8d803e18d8.tar.gz
qemu-891ff9f4a371da2dbd5244590eb35e8d803e18d8.tar.bz2
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.0-20181221' into staging
ppc patch queue 2018-12-21 This pull request supersedes the one from 2018-12-13. This is a revised first ppc pull request for qemu-4.0. Highlights are: * Most of the code for the POWER9 "XIVE" interrupt controller (not complete yet, but we're getting there) * A number of g_new vs. g_malloc cleanups * Some IRQ wiring cleanups * A fix for how we advertise NUMA nodes to the guest for pseries # gpg: Signature made Fri 21 Dec 2018 05:34:12 GMT # gpg: using RSA key 6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-4.0-20181221: (40 commits) MAINTAINERS: PPC: add a XIVE section spapr: change default CPU type to POWER9 spapr: introduce an 'ic-mode' machine option spapr: add an extra OV5 field to the sPAPR IRQ backend spapr: add a 'reset' method to the sPAPR IRQ backend spapr: extend the sPAPR IRQ backend for XICS migration spapr: allocate the interrupt thread context under the CPU core spapr: add device tree support for the XIVE exploitation mode spapr: add hcalls support for the XIVE exploitation interrupt mode spapr: introduce a new machine IRQ backend for XIVE spapr-iommu: Always advertise the maximum possible DMA window size spapr/xive: use the VCPU id as a NVT identifier spapr/xive: introduce a XIVE interrupt controller ppc/xive: notify the CPU when the interrupt priority is more privileged ppc/xive: introduce a simplified XIVE presenter ppc/xive: introduce the XIVE interrupt thread context ppc/xive: add support for the END Event State Buffers Changes requirement for "vsubsbs" instruction spapr: export and rename the xics_max_server_number() routine spapr: introduce a spapr_irq_init() routine ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/spapr_xive.c1486
-rw-r--r--hw/intc/xics_spapr.c3
-rw-r--r--hw/intc/xive.c1599
-rw-r--r--hw/ppc/e500.c18
-rw-r--r--hw/ppc/mac_newworld.c30
-rw-r--r--hw/ppc/ppc405_boards.c4
-rw-r--r--hw/ppc/ppc405_uc.c4
-rw-r--r--hw/ppc/ppc440_bamboo.c5
-rw-r--r--hw/ppc/sam460ex.c2
-rw-r--r--hw/ppc/spapr.c121
-rw-r--r--hw/ppc/spapr_cpu_core.c4
-rw-r--r--hw/ppc/spapr_iommu.c2
-rw-r--r--hw/ppc/spapr_irq.c194
-rw-r--r--hw/ppc/spapr_rtas_ddw.c19
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/ppc/virtex_ml507.c2
17 files changed, 3393 insertions, 104 deletions
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 0e9963f..301a8e9 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -37,6 +37,8 @@ obj-$(CONFIG_SH4) += sh_intc.o
obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
obj-$(CONFIG_XICS_KVM) += xics_kvm.o
+obj-$(CONFIG_XIVE) += xive.o
+obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
obj-$(CONFIG_POWERNV) += xics_pnv.o
obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
new file mode 100644
index 0000000..0e39c90
--- /dev/null
+++ b/hw/intc/spapr_xive.c
@@ -0,0 +1,1486 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Virtualization Controller BAR and Thread Managment BAR that we
+ * use for the ESB pages and the TIMA pages
+ */
+#define SPAPR_XIVE_VC_BASE 0x0006010000000000ull
+#define SPAPR_XIVE_TM_BASE 0x0006030203180000ull
+
+/*
+ * The allocation of VP blocks is a complex operation in OPAL and the
+ * VP identifiers have a relation with the number of HW chips, the
+ * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
+ * controller model does not have the same constraints and can use a
+ * simple mapping scheme of the CPU vcpu_id
+ *
+ * These identifiers are never returned to the OS.
+ */
+
+#define SPAPR_XIVE_NVT_BASE 0x400
+
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
+/*
+ * sPAPR NVT and END indexing helpers
+ */
+static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx)
+{
+ return nvt_idx - SPAPR_XIVE_NVT_BASE;
+}
+
+static void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
+ uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+ assert(cpu);
+
+ if (out_nvt_blk) {
+ *out_nvt_blk = SPAPR_XIVE_BLOCK_ID;
+ }
+
+ if (out_nvt_blk) {
+ *out_nvt_idx = SPAPR_XIVE_NVT_BASE + cpu->vcpu_id;
+ }
+}
+
+static int spapr_xive_target_to_nvt(uint32_t target,
+ uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+ PowerPCCPU *cpu = spapr_find_cpu(target);
+
+ if (!cpu) {
+ return -1;
+ }
+
+ spapr_xive_cpu_to_nvt(cpu, out_nvt_blk, out_nvt_idx);
+ return 0;
+}
+
+/*
+ * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
+ * priorities per CPU
+ */
+static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
+ uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+ assert(cpu);
+
+ if (out_end_blk) {
+ *out_end_blk = SPAPR_XIVE_BLOCK_ID;
+ }
+
+ if (out_end_idx) {
+ *out_end_idx = (cpu->vcpu_id << 3) + prio;
+ }
+}
+
+static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
+ uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+ PowerPCCPU *cpu = spapr_find_cpu(target);
+
+ if (!cpu) {
+ return -1;
+ }
+
+ spapr_xive_cpu_to_end(cpu, prio, out_end_blk, out_end_idx);
+ return 0;
+}
+
+/*
+ * On sPAPR machines, use a simplified output for the XIVE END
+ * structure dumping only the information related to the OS EQ.
+ */
+static void spapr_xive_end_pic_print_info(sPAPRXive *xive, XiveEND *end,
+ Monitor *mon)
+{
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qentries = 1 << (qsize + 10);
+ uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+ uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+
+ monitor_printf(mon, "%3d/%d % 6d/%5d ^%d",
+ spapr_xive_nvt_to_target(0, nvt),
+ priority, qindex, qentries, qgen);
+
+ xive_end_queue_pic_print_info(end, 6, mon);
+ monitor_printf(mon, "]");
+}
+
+void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon)
+{
+ XiveSource *xsrc = &xive->source;
+ int i;
+
+ monitor_printf(mon, " LSIN PQ EISN CPU/PRIO EQ\n");
+
+ for (i = 0; i < xive->nr_irqs; i++) {
+ uint8_t pq = xive_source_esb_get(xsrc, i);
+ XiveEAS *eas = &xive->eat[i];
+
+ if (!xive_eas_is_valid(eas)) {
+ continue;
+ }
+
+ monitor_printf(mon, " %08x %s %c%c%c %s %08x ", i,
+ xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
+ pq & XIVE_ESB_VAL_P ? 'P' : '-',
+ pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ',
+ xive_eas_is_masked(eas) ? "M" : " ",
+ (int) xive_get_field64(EAS_END_DATA, eas->w));
+
+ if (!xive_eas_is_masked(eas)) {
+ uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+ XiveEND *end;
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ if (xive_end_is_valid(end)) {
+ spapr_xive_end_pic_print_info(xive, end, mon);
+ }
+ }
+ monitor_printf(mon, "\n");
+ }
+}
+
+static void spapr_xive_map_mmio(sPAPRXive *xive)
+{
+ sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(xive), 2, xive->tm_base);
+}
+
+/*
+ * When a Virtual Processor is scheduled to run on a HW thread, the
+ * hypervisor pushes its identifier in the OS CAM line. Emulate the
+ * same behavior under QEMU.
+ */
+void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx)
+{
+ uint8_t nvt_blk;
+ uint32_t nvt_idx;
+ uint32_t nvt_cam;
+
+ spapr_xive_cpu_to_nvt(POWERPC_CPU(tctx->cs), &nvt_blk, &nvt_idx);
+
+ nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_nvt_cam_line(nvt_blk, nvt_idx));
+ memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
+}
+
+static void spapr_xive_end_reset(XiveEND *end)
+{
+ memset(end, 0, sizeof(*end));
+
+ /* switch off the escalation and notification ESBs */
+ end->w1 = cpu_to_be32(END_W1_ESe_Q | END_W1_ESn_Q);
+}
+
+static void spapr_xive_reset(void *dev)
+{
+ sPAPRXive *xive = SPAPR_XIVE(dev);
+ int i;
+
+ /*
+ * The XiveSource has its own reset handler, which mask off all
+ * IRQs (!P|Q)
+ */
+
+ /* Mask all valid EASs in the IRQ number space. */
+ for (i = 0; i < xive->nr_irqs; i++) {
+ XiveEAS *eas = &xive->eat[i];
+ if (xive_eas_is_valid(eas)) {
+ eas->w = cpu_to_be64(EAS_VALID | EAS_MASKED);
+ } else {
+ eas->w = 0;
+ }
+ }
+
+ /* Clear all ENDs */
+ for (i = 0; i < xive->nr_ends; i++) {
+ spapr_xive_end_reset(&xive->endt[i]);
+ }
+}
+
+static void spapr_xive_instance_init(Object *obj)
+{
+ sPAPRXive *xive = SPAPR_XIVE(obj);
+
+ object_initialize(&xive->source, sizeof(xive->source), TYPE_XIVE_SOURCE);
+ object_property_add_child(obj, "source", OBJECT(&xive->source), NULL);
+
+ object_initialize(&xive->end_source, sizeof(xive->end_source),
+ TYPE_XIVE_END_SOURCE);
+ object_property_add_child(obj, "end_source", OBJECT(&xive->end_source),
+ NULL);
+}
+
+static void spapr_xive_realize(DeviceState *dev, Error **errp)
+{
+ sPAPRXive *xive = SPAPR_XIVE(dev);
+ XiveSource *xsrc = &xive->source;
+ XiveENDSource *end_xsrc = &xive->end_source;
+ Error *local_err = NULL;
+
+ if (!xive->nr_irqs) {
+ error_setg(errp, "Number of interrupt needs to be greater 0");
+ return;
+ }
+
+ if (!xive->nr_ends) {
+ error_setg(errp, "Number of interrupt needs to be greater 0");
+ return;
+ }
+
+ /*
+ * Initialize the internal sources, for IPIs and virtual devices.
+ */
+ object_property_set_int(OBJECT(xsrc), xive->nr_irqs, "nr-irqs",
+ &error_fatal);
+ object_property_add_const_link(OBJECT(xsrc), "xive", OBJECT(xive),
+ &error_fatal);
+ object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /*
+ * Initialize the END ESB source
+ */
+ object_property_set_int(OBJECT(end_xsrc), xive->nr_irqs, "nr-ends",
+ &error_fatal);
+ object_property_add_const_link(OBJECT(end_xsrc), "xive", OBJECT(xive),
+ &error_fatal);
+ object_property_set_bool(OBJECT(end_xsrc), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Set the mapping address of the END ESB pages after the source ESBs */
+ xive->end_base = xive->vc_base + (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+
+ /*
+ * Allocate the routing tables
+ */
+ xive->eat = g_new0(XiveEAS, xive->nr_irqs);
+ xive->endt = g_new0(XiveEND, xive->nr_ends);
+
+ /* TIMA initialization */
+ memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
+ "xive.tima", 4ull << TM_SHIFT);
+
+ /* Define all XIVE MMIO regions on SysBus */
+ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
+ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
+ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+
+ /* Map all regions */
+ spapr_xive_map_mmio(xive);
+
+ qemu_register_reset(spapr_xive_reset, dev);
+}
+
+static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
+ uint32_t eas_idx, XiveEAS *eas)
+{
+ sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+ if (eas_idx >= xive->nr_irqs) {
+ return -1;
+ }
+
+ *eas = xive->eat[eas_idx];
+ return 0;
+}
+
+static int spapr_xive_get_end(XiveRouter *xrtr,
+ uint8_t end_blk, uint32_t end_idx, XiveEND *end)
+{
+ sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+ if (end_idx >= xive->nr_ends) {
+ return -1;
+ }
+
+ memcpy(end, &xive->endt[end_idx], sizeof(XiveEND));
+ return 0;
+}
+
+static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk,
+ uint32_t end_idx, XiveEND *end,
+ uint8_t word_number)
+{
+ sPAPRXive *xive = SPAPR_XIVE(xrtr);
+
+ if (end_idx >= xive->nr_ends) {
+ return -1;
+ }
+
+ memcpy(&xive->endt[end_idx], end, sizeof(XiveEND));
+ return 0;
+}
+
+static int spapr_xive_get_nvt(XiveRouter *xrtr,
+ uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
+{
+ uint32_t vcpu_id = spapr_xive_nvt_to_target(nvt_blk, nvt_idx);
+ PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
+
+ if (!cpu) {
+ /* TODO: should we assert() if we can find a NVT ? */
+ return -1;
+ }
+
+ /*
+ * sPAPR does not maintain a NVT table. Return that the NVT is
+ * valid if we have found a matching CPU
+ */
+ nvt->w0 = cpu_to_be32(NVT_W0_VALID);
+ return 0;
+}
+
+static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk,
+ uint32_t nvt_idx, XiveNVT *nvt,
+ uint8_t word_number)
+{
+ /*
+ * We don't need to write back to the NVTs because the sPAPR
+ * machine should never hit a non-scheduled NVT. It should never
+ * get called.
+ */
+ g_assert_not_reached();
+}
+
+static const VMStateDescription vmstate_spapr_xive_end = {
+ .name = TYPE_SPAPR_XIVE "/end",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(w0, XiveEND),
+ VMSTATE_UINT32(w1, XiveEND),
+ VMSTATE_UINT32(w2, XiveEND),
+ VMSTATE_UINT32(w3, XiveEND),
+ VMSTATE_UINT32(w4, XiveEND),
+ VMSTATE_UINT32(w5, XiveEND),
+ VMSTATE_UINT32(w6, XiveEND),
+ VMSTATE_UINT32(w7, XiveEND),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_xive_eas = {
+ .name = TYPE_SPAPR_XIVE "/eas",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT64(w, XiveEAS),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_xive = {
+ .name = TYPE_SPAPR_XIVE,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_EQUAL(nr_irqs, sPAPRXive, NULL),
+ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, sPAPRXive, nr_irqs,
+ vmstate_spapr_xive_eas, XiveEAS),
+ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(endt, sPAPRXive, nr_ends,
+ vmstate_spapr_xive_end, XiveEND),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property spapr_xive_properties[] = {
+ DEFINE_PROP_UINT32("nr-irqs", sPAPRXive, nr_irqs, 0),
+ DEFINE_PROP_UINT32("nr-ends", sPAPRXive, nr_ends, 0),
+ DEFINE_PROP_UINT64("vc-base", sPAPRXive, vc_base, SPAPR_XIVE_VC_BASE),
+ DEFINE_PROP_UINT64("tm-base", sPAPRXive, tm_base, SPAPR_XIVE_TM_BASE),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_xive_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
+
+ dc->desc = "sPAPR XIVE Interrupt Controller";
+ dc->props = spapr_xive_properties;
+ dc->realize = spapr_xive_realize;
+ dc->vmsd = &vmstate_spapr_xive;
+
+ xrc->get_eas = spapr_xive_get_eas;
+ xrc->get_end = spapr_xive_get_end;
+ xrc->write_end = spapr_xive_write_end;
+ xrc->get_nvt = spapr_xive_get_nvt;
+ xrc->write_nvt = spapr_xive_write_nvt;
+}
+
+static const TypeInfo spapr_xive_info = {
+ .name = TYPE_SPAPR_XIVE,
+ .parent = TYPE_XIVE_ROUTER,
+ .instance_init = spapr_xive_instance_init,
+ .instance_size = sizeof(sPAPRXive),
+ .class_init = spapr_xive_class_init,
+};
+
+static void spapr_xive_register_types(void)
+{
+ type_register_static(&spapr_xive_info);
+}
+
+type_init(spapr_xive_register_types)
+
+bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi)
+{
+ XiveSource *xsrc = &xive->source;
+
+ if (lisn >= xive->nr_irqs) {
+ return false;
+ }
+
+ xive->eat[lisn].w |= cpu_to_be64(EAS_VALID);
+ xive_source_irq_set(xsrc, lisn, lsi);
+ return true;
+}
+
+bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn)
+{
+ XiveSource *xsrc = &xive->source;
+
+ if (lisn >= xive->nr_irqs) {
+ return false;
+ }
+
+ xive->eat[lisn].w &= cpu_to_be64(~EAS_VALID);
+ xive_source_irq_set(xsrc, lisn, false);
+ return true;
+}
+
+qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn)
+{
+ XiveSource *xsrc = &xive->source;
+
+ if (lisn >= xive->nr_irqs) {
+ return NULL;
+ }
+
+ /* The sPAPR machine/device should have claimed the IRQ before */
+ assert(xive_eas_is_valid(&xive->eat[lisn]));
+
+ return xive_source_qirq(xsrc, lisn);
+}
+
+/*
+ * XIVE hcalls
+ *
+ * The terminology used by the XIVE hcalls is the following :
+ *
+ * TARGET vCPU number
+ * EQ Event Queue assigned by OS to receive event data
+ * ESB page for source interrupt management
+ * LISN Logical Interrupt Source Number identifying a source in the
+ * machine
+ * EISN Effective Interrupt Source Number used by guest OS to
+ * identify source in the guest
+ *
+ * The EAS, END, NVT structures are not exposed.
+ */
+
+/*
+ * Linux hosts under OPAL reserve priority 7 for their own escalation
+ * interrupts (DD2.X POWER9). So we only allow the guest to use
+ * priorities [0..6].
+ */
+static bool spapr_xive_priority_is_reserved(uint8_t priority)
+{
+ switch (priority) {
+ case 0 ... 6:
+ return false;
+ case 7: /* OPAL escalation queue */
+ default:
+ return true;
+ }
+}
+
+/*
+ * The H_INT_GET_SOURCE_INFO hcall() is used to obtain the logical
+ * real address of the MMIO page through which the Event State Buffer
+ * entry associated with the value of the "lisn" parameter is managed.
+ *
+ * Parameters:
+ * Input
+ * - R4: "flags"
+ * Bits 0-63 reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as returned
+ * by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output
+ * - R4: "flags"
+ * Bits 0-59: Reserved
+ * Bit 60: H_INT_ESB must be used for Event State Buffer
+ * management
+ * Bit 61: 1 == LSI 0 == MSI
+ * Bit 62: the full function page supports trigger
+ * Bit 63: Store EOI Supported
+ * - R5: Logical Real address of full function Event State Buffer
+ * management page, -1 if H_INT_ESB hcall flag is set to 1.
+ * - R6: Logical Real Address of trigger only Event State Buffer
+ * management page or -1.
+ * - R7: Power of 2 page size for the ESB management pages returned in
+ * R5 and R6.
+ */
+
+#define SPAPR_XIVE_SRC_H_INT_ESB PPC_BIT(60) /* ESB manage with H_INT_ESB */
+#define SPAPR_XIVE_SRC_LSI PPC_BIT(61) /* Virtual LSI type */
+#define SPAPR_XIVE_SRC_TRIGGER PPC_BIT(62) /* Trigger and management
+ on same page */
+#define SPAPR_XIVE_SRC_STORE_EOI PPC_BIT(63) /* Store EOI support */
+
+static target_ulong h_int_get_source_info(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveSource *xsrc = &xive->source;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ if (!xive_eas_is_valid(&xive->eat[lisn])) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /*
+ * All sources are emulated under the main XIVE object and share
+ * the same characteristics.
+ */
+ args[0] = 0;
+ if (!xive_source_esb_has_2page(xsrc)) {
+ args[0] |= SPAPR_XIVE_SRC_TRIGGER;
+ }
+ if (xsrc->esb_flags & XIVE_SRC_STORE_EOI) {
+ args[0] |= SPAPR_XIVE_SRC_STORE_EOI;
+ }
+
+ /*
+ * Force the use of the H_INT_ESB hcall in case of an LSI
+ * interrupt. This is necessary under KVM to re-trigger the
+ * interrupt if the level is still asserted
+ */
+ if (xive_source_irq_is_lsi(xsrc, lisn)) {
+ args[0] |= SPAPR_XIVE_SRC_H_INT_ESB | SPAPR_XIVE_SRC_LSI;
+ }
+
+ if (!(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
+ args[1] = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn);
+ } else {
+ args[1] = -1;
+ }
+
+ if (xive_source_esb_has_2page(xsrc) &&
+ !(args[0] & SPAPR_XIVE_SRC_H_INT_ESB)) {
+ args[2] = xive->vc_base + xive_source_esb_page(xsrc, lisn);
+ } else {
+ args[2] = -1;
+ }
+
+ if (xive_source_esb_has_2page(xsrc)) {
+ args[3] = xsrc->esb_shift - 1;
+ } else {
+ args[3] = xsrc->esb_shift;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_SOURCE_CONFIG hcall() is used to assign a Logical
+ * Interrupt Source to a target. The Logical Interrupt Source is
+ * designated with the "lisn" parameter and the target is designated
+ * with the "target" and "priority" parameters. Upon return from the
+ * hcall(), no additional interrupts will be directed to the old EQ.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-61: Reserved
+ * Bit 62: set the "eisn" in the EAS
+ * Bit 63: masks the interrupt source in the hardware interrupt
+ * control structure. An interrupt masked by this mechanism will
+ * be dropped, but it's source state bits will still be
+ * set. There is no race-free way of unmasking and restoring the
+ * source. Thus this should only be used in interrupts that are
+ * also masked at the source, and only in cases where the
+ * interrupt is not meant to be used for a large amount of time
+ * because no valid target exists for it for example
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as returned by
+ * the H_ALLOCATE_VAS_WINDOW hcall
+ * - R6: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R7: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ * - R8: "eisn" is the guest EISN associated with the "lisn"
+ *
+ * Output:
+ * - None
+ */
+
+#define SPAPR_XIVE_SRC_SET_EISN PPC_BIT(62)
+#define SPAPR_XIVE_SRC_MASK PPC_BIT(63)
+
+static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveEAS eas, new_eas;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+ target_ulong target = args[2];
+ target_ulong priority = args[3];
+ target_ulong eisn = args[4];
+ uint8_t end_blk;
+ uint32_t end_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~(SPAPR_XIVE_SRC_SET_EISN | SPAPR_XIVE_SRC_MASK)) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /* priority 0xff is used to reset the EAS */
+ if (priority == 0xff) {
+ new_eas.w = cpu_to_be64(EAS_VALID | EAS_MASKED);
+ goto out;
+ }
+
+ if (flags & SPAPR_XIVE_SRC_MASK) {
+ new_eas.w = eas.w | cpu_to_be64(EAS_MASKED);
+ } else {
+ new_eas.w = eas.w & cpu_to_be64(~EAS_MASKED);
+ }
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P4;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P3;
+ }
+
+ new_eas.w = xive_set_field64(EAS_END_BLOCK, new_eas.w, end_blk);
+ new_eas.w = xive_set_field64(EAS_END_INDEX, new_eas.w, end_idx);
+
+ if (flags & SPAPR_XIVE_SRC_SET_EISN) {
+ new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
+ }
+
+out:
+ xive->eat[lisn] = new_eas;
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_SOURCE_CONFIG hcall() is used to determine to which
+ * target/priority pair is assigned to the specified Logical Interrupt
+ * Source.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63 Reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as
+ * returned by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output:
+ * - R4: Target to which the specified Logical Interrupt Source is
+ * assigned
+ * - R5: Priority to which the specified Logical Interrupt Source is
+ * assigned
+ * - R6: EISN for the specified Logical Interrupt Source (this will be
+ * equivalent to the LISN if not changed by H_INT_SET_SOURCE_CONFIG)
+ */
+static target_ulong h_int_get_source_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+ XiveEAS eas;
+ XiveEND *end;
+ uint8_t nvt_blk;
+ uint32_t end_idx, nvt_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /* EAS_END_BLOCK is unused on sPAPR */
+ end_idx = xive_get_field64(EAS_END_INDEX, eas.w);
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
+ nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+ args[0] = spapr_xive_nvt_to_target(nvt_blk, nvt_idx);
+
+ if (xive_eas_is_masked(&eas)) {
+ args[1] = 0xff;
+ } else {
+ args[1] = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+ }
+
+ args[2] = xive_get_field64(EAS_END_DATA, eas.w);
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_QUEUE_INFO hcall() is used to get the logical real
+ * address of the notification management page associated with the
+ * specified target and priority.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63 Reserved
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ *
+ * Output:
+ * - R4: Logical real address of notification page
+ * - R5: Power of 2 page size of the notification page
+ */
+static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveENDSource *end_xsrc = &xive->end_source;
+ target_ulong flags = args[0];
+ target_ulong target = args[1];
+ target_ulong priority = args[2];
+ XiveEND *end;
+ uint8_t end_blk;
+ uint32_t end_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P3;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P2;
+ }
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx;
+ if (xive_end_is_enqueue(end)) {
+ args[1] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+ } else {
+ args[1] = 0;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_QUEUE_CONFIG hcall() is used to set or reset a EQ for
+ * a given "target" and "priority". It is also used to set the
+ * notification config associated with the EQ. An EQ size of 0 is
+ * used to reset the EQ config for a given target and priority. If
+ * resetting the EQ config, the END associated with the given "target"
+ * and "priority" will be changed to disable queueing.
+ *
+ * Upon return from the hcall(), no additional interrupts will be
+ * directed to the old EQ (if one was set). The old EQ (if one was
+ * set) should be investigated for interrupts that occurred prior to
+ * or during the hcall().
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-62: Reserved
+ * Bit 63: Unconditional Notify (n) per the XIVE spec
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ * - R7: "eventQueue": The logical real address of the start of the EQ
+ * - R8: "eventQueueSize": The power of 2 EQ size per "ibm,xive-eq-sizes"
+ *
+ * Output:
+ * - None
+ */
+
+#define SPAPR_XIVE_END_ALWAYS_NOTIFY PPC_BIT(63)
+
+static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+ target_ulong target = args[1];
+ target_ulong priority = args[2];
+ target_ulong qpage = args[3];
+ target_ulong qsize = args[4];
+ XiveEND end;
+ uint8_t end_blk, nvt_blk;
+ uint32_t end_idx, nvt_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~SPAPR_XIVE_END_ALWAYS_NOTIFY) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P3;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P2;
+ }
+
+ assert(end_idx < xive->nr_ends);
+ memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND));
+
+ switch (qsize) {
+ case 12:
+ case 16:
+ case 21:
+ case 24:
+ end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff);
+ end.w3 = cpu_to_be32(qpage & 0xffffffff);
+ end.w0 |= cpu_to_be32(END_W0_ENQUEUE);
+ end.w0 = xive_set_field32(END_W0_QSIZE, end.w0, qsize - 12);
+ break;
+ case 0:
+ /* reset queue and disable queueing */
+ spapr_xive_end_reset(&end);
+ goto out;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EQ size %"PRIx64"\n",
+ qsize);
+ return H_P5;
+ }
+
+ if (qsize) {
+ hwaddr plen = 1 << qsize;
+ void *eq;
+
+ /*
+ * Validate the guest EQ. We should also check that the queue
+ * has been zeroed by the OS.
+ */
+ eq = address_space_map(CPU(cpu)->as, qpage, &plen, true,
+ MEMTXATTRS_UNSPECIFIED);
+ if (plen != 1 << qsize) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to map EQ @0x%"
+ HWADDR_PRIx "\n", qpage);
+ return H_P4;
+ }
+ address_space_unmap(CPU(cpu)->as, eq, plen, true, plen);
+ }
+
+ /* "target" should have been validated above */
+ if (spapr_xive_target_to_nvt(target, &nvt_blk, &nvt_idx)) {
+ g_assert_not_reached();
+ }
+
+ /*
+ * Ensure the priority and target are correctly set (they will not
+ * be right after allocation)
+ */
+ end.w6 = xive_set_field32(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
+ xive_set_field32(END_W6_NVT_INDEX, 0ul, nvt_idx);
+ end.w7 = xive_set_field32(END_W7_F0_PRIORITY, 0ul, priority);
+
+ if (flags & SPAPR_XIVE_END_ALWAYS_NOTIFY) {
+ end.w0 |= cpu_to_be32(END_W0_UCOND_NOTIFY);
+ } else {
+ end.w0 &= cpu_to_be32((uint32_t)~END_W0_UCOND_NOTIFY);
+ }
+
+ /*
+ * The generation bit for the END starts at 1 and The END page
+ * offset counter starts at 0.
+ */
+ end.w1 = cpu_to_be32(END_W1_GENERATION) |
+ xive_set_field32(END_W1_PAGE_OFF, 0ul, 0ul);
+ end.w0 |= cpu_to_be32(END_W0_VALID);
+
+ /*
+ * TODO: issue syncs required to ensure all in-flight interrupts
+ * are complete on the old END
+ */
+
+out:
+ /* Update END */
+ memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_GET_QUEUE_CONFIG hcall() is used to get a EQ for a given
+ * target and priority.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-62: Reserved
+ * Bit 63: Debug: Return debug data
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "priority" is a valid priority not in
+ * "ibm,plat-res-int-priorities"
+ *
+ * Output:
+ * - R4: "flags":
+ * Bits 0-61: Reserved
+ * Bit 62: The value of Event Queue Generation Number (g) per
+ * the XIVE spec if "Debug" = 1
+ * Bit 63: The value of Unconditional Notify (n) per the XIVE spec
+ * - R5: The logical real address of the start of the EQ
+ * - R6: The power of 2 EQ size per "ibm,xive-eq-sizes"
+ * - R7: The value of Event Queue Offset Counter per XIVE spec
+ * if "Debug" = 1, else 0
+ *
+ */
+
+#define SPAPR_XIVE_END_DEBUG PPC_BIT(63)
+
+static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+ target_ulong target = args[1];
+ target_ulong priority = args[2];
+ XiveEND *end;
+ uint8_t end_blk;
+ uint32_t end_idx;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~SPAPR_XIVE_END_DEBUG) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ if (spapr_xive_priority_is_reserved(priority)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
+ " is reserved\n", priority);
+ return H_P3;
+ }
+
+ /*
+ * Validate that "target" is part of the list of threads allocated
+ * to the partition. For that, find the END corresponding to the
+ * target.
+ */
+ if (spapr_xive_target_to_end(target, priority, &end_blk, &end_idx)) {
+ return H_P2;
+ }
+
+ assert(end_idx < xive->nr_ends);
+ end = &xive->endt[end_idx];
+
+ args[0] = 0;
+ if (xive_end_is_notify(end)) {
+ args[0] |= SPAPR_XIVE_END_ALWAYS_NOTIFY;
+ }
+
+ if (xive_end_is_enqueue(end)) {
+ args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+ } else {
+ args[1] = 0;
+ args[2] = 0;
+ }
+
+ /* TODO: do we need any locking on the END ? */
+ if (flags & SPAPR_XIVE_END_DEBUG) {
+ /* Load the event queue generation number into the return flags */
+ args[0] |= (uint64_t)xive_get_field32(END_W1_GENERATION, end->w1) << 62;
+
+ /* Load R7 with the event queue offset counter */
+ args[3] = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ } else {
+ args[3] = 0;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SET_OS_REPORTING_LINE hcall() is used to set the
+ * reporting cache line pair for the calling thread. The reporting
+ * cache lines will contain the OS interrupt context when the OS
+ * issues a CI store byte to @TIMA+0xC10 to acknowledge the OS
+ * interrupt. The reporting cache lines can be reset by inputting -1
+ * in "reportingLine". Issuing the CI store byte without reporting
+ * cache lines registered will result in the data not being accessible
+ * to the OS.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ * - R5: "reportingLine": The logical real address of the reporting cache
+ * line pair
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_set_os_reporting_line(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ /* TODO: H_INT_SET_OS_REPORTING_LINE */
+ return H_FUNCTION;
+}
+
+/*
+ * The H_INT_GET_OS_REPORTING_LINE hcall() is used to get the logical
+ * real address of the reporting cache line pair set for the input
+ * "target". If no reporting cache line pair has been set, -1 is
+ * returned.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ * - R5: "target" is per "ibm,ppc-interrupt-server#s" or
+ * "ibm,ppc-interrupt-gserver#s"
+ * - R6: "reportingLine": The logical real address of the reporting
+ * cache line pair
+ *
+ * Output:
+ * - R4: The logical real address of the reporting line if set, else -1
+ */
+static target_ulong h_int_get_os_reporting_line(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ /* TODO: H_INT_GET_OS_REPORTING_LINE */
+ return H_FUNCTION;
+}
+
+/*
+ * The H_INT_ESB hcall() is used to issue a load or store to the ESB
+ * page for the input "lisn". This hcall is only supported for LISNs
+ * that have the ESB hcall flag set to 1 when returned from hcall()
+ * H_INT_GET_SOURCE_INFO.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-62: Reserved
+ * bit 63: Store: Store=1, store operation, else load operation
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as
+ * returned by the H_ALLOCATE_VAS_WINDOW hcall
+ * - R6: "esbOffset" is the offset into the ESB page for the load or
+ * store operation
+ * - R7: "storeData" is the data to write for a store operation
+ *
+ * Output:
+ * - R4: The value of the load if load operation, else -1
+ */
+
+#define SPAPR_XIVE_ESB_STORE PPC_BIT(63)
+
+static target_ulong h_int_esb(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveEAS eas;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+ target_ulong offset = args[2];
+ target_ulong data = args[3];
+ hwaddr mmio_addr;
+ XiveSource *xsrc = &xive->source;
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags & ~SPAPR_XIVE_ESB_STORE) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ if (offset > (1ull << xsrc->esb_shift)) {
+ return H_P3;
+ }
+
+ mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+
+ if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+ (flags & SPAPR_XIVE_ESB_STORE))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+ HWADDR_PRIx "\n", mmio_addr);
+ return H_HARDWARE;
+ }
+ args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_SYNC hcall() is used to issue hardware syncs that will
+ * ensure any in flight events for the input lisn are in the event
+ * queue.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ * - R5: "lisn" is per "interrupts", "interrupt-map", or
+ * "ibm,xive-lisn-ranges" properties, or as returned by the
+ * ibm,query-interrupt-source-number RTAS call, or as
+ * returned by the H_ALLOCATE_VAS_WINDOW hcall
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_sync(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ XiveEAS eas;
+ target_ulong flags = args[0];
+ target_ulong lisn = args[1];
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ if (lisn >= xive->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ eas = xive->eat[lisn];
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid LISN " TARGET_FMT_lx "\n",
+ lisn);
+ return H_P2;
+ }
+
+ /*
+ * H_STATE should be returned if a H_INT_RESET is in progress.
+ * This is not needed when running the emulation under QEMU
+ */
+
+ /* This is not real hardware. Nothing to be done */
+ return H_SUCCESS;
+}
+
+/*
+ * The H_INT_RESET hcall() is used to reset all of the partition's
+ * interrupt exploitation structures to their initial state. This
+ * means losing all previously set interrupt state set via
+ * H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG.
+ *
+ * Parameters:
+ * Input:
+ * - R4: "flags"
+ * Bits 0-63: Reserved
+ *
+ * Output:
+ * - None
+ */
+static target_ulong h_int_reset(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ sPAPRXive *xive = spapr->xive;
+ target_ulong flags = args[0];
+
+ if (!spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ return H_FUNCTION;
+ }
+
+ if (flags) {
+ return H_PARAMETER;
+ }
+
+ device_reset(DEVICE(xive));
+ return H_SUCCESS;
+}
+
+void spapr_xive_hcall_init(sPAPRMachineState *spapr)
+{
+ spapr_register_hypercall(H_INT_GET_SOURCE_INFO, h_int_get_source_info);
+ spapr_register_hypercall(H_INT_SET_SOURCE_CONFIG, h_int_set_source_config);
+ spapr_register_hypercall(H_INT_GET_SOURCE_CONFIG, h_int_get_source_config);
+ spapr_register_hypercall(H_INT_GET_QUEUE_INFO, h_int_get_queue_info);
+ spapr_register_hypercall(H_INT_SET_QUEUE_CONFIG, h_int_set_queue_config);
+ spapr_register_hypercall(H_INT_GET_QUEUE_CONFIG, h_int_get_queue_config);
+ spapr_register_hypercall(H_INT_SET_OS_REPORTING_LINE,
+ h_int_set_os_reporting_line);
+ spapr_register_hypercall(H_INT_GET_OS_REPORTING_LINE,
+ h_int_get_os_reporting_line);
+ spapr_register_hypercall(H_INT_ESB, h_int_esb);
+ spapr_register_hypercall(H_INT_SYNC, h_int_sync);
+ spapr_register_hypercall(H_INT_RESET, h_int_reset);
+}
+
+void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+ uint32_t phandle)
+{
+ sPAPRXive *xive = spapr->xive;
+ int node;
+ uint64_t timas[2 * 2];
+ /* Interrupt number ranges for the IPIs */
+ uint32_t lisn_ranges[] = {
+ cpu_to_be32(0),
+ cpu_to_be32(nr_servers),
+ };
+ /*
+ * EQ size - the sizes of pages supported by the system 4K, 64K,
+ * 2M, 16M. We only advertise 64K for the moment.
+ */
+ uint32_t eq_sizes[] = {
+ cpu_to_be32(16), /* 64K */
+ };
+ /*
+ * The following array is in sync with the reserved priorities
+ * defined by the 'spapr_xive_priority_is_reserved' routine.
+ */
+ uint32_t plat_res_int_priorities[] = {
+ cpu_to_be32(7), /* start */
+ cpu_to_be32(0xf8), /* count */
+ };
+ gchar *nodename;
+
+ /* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */
+ timas[0] = cpu_to_be64(xive->tm_base +
+ XIVE_TM_USER_PAGE * (1ull << TM_SHIFT));
+ timas[1] = cpu_to_be64(1ull << TM_SHIFT);
+ timas[2] = cpu_to_be64(xive->tm_base +
+ XIVE_TM_OS_PAGE * (1ull << TM_SHIFT));
+ timas[3] = cpu_to_be64(1ull << TM_SHIFT);
+
+ nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+ xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+ _FDT(node = fdt_add_subnode(fdt, 0, nodename));
+ g_free(nodename);
+
+ _FDT(fdt_setprop_string(fdt, node, "device_type", "power-ivpe"));
+ _FDT(fdt_setprop(fdt, node, "reg", timas, sizeof(timas)));
+
+ _FDT(fdt_setprop_string(fdt, node, "compatible", "ibm,power-ivpe"));
+ _FDT(fdt_setprop(fdt, node, "ibm,xive-eq-sizes", eq_sizes,
+ sizeof(eq_sizes)));
+ _FDT(fdt_setprop(fdt, node, "ibm,xive-lisn-ranges", lisn_ranges,
+ sizeof(lisn_ranges)));
+
+ /* For Linux to link the LSIs to the interrupt controller. */
+ _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+ _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+
+ /* For SLOF */
+ _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
+ _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
+
+ /*
+ * The "ibm,plat-res-int-priorities" property defines the priority
+ * ranges reserved by the hypervisor
+ */
+ _FDT(fdt_setprop(fdt, 0, "ibm,plat-res-int-priorities",
+ plat_res_int_priorities, sizeof(plat_res_int_priorities)));
+}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e27b92..f67d3c8 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -244,7 +244,8 @@ void xics_spapr_init(sPAPRMachineState *spapr)
spapr_register_hypercall(H_IPOLL, h_ipoll);
}
-void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
+void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt,
+ uint32_t phandle)
{
uint32_t interrupt_server_ranges_prop[] = {
0, cpu_to_be32(nr_servers),
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
new file mode 100644
index 0000000..ea33494
--- /dev/null
+++ b/hw/intc/xive.c
@@ -0,0 +1,1599 @@
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/dma.h"
+#include "hw/qdev-properties.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive_regs.h"
+
+/*
+ * XIVE Thread Interrupt Management context
+ */
+
+/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static uint8_t priority_to_ipb(uint8_t priority)
+{
+ return priority > XIVE_PRIORITY_MAX ?
+ 0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
+/*
+ * Convert an Interrupt Pending Buffer (IPB) register to a Pending
+ * Interrupt Priority Register (PIPR), which contains the priority of
+ * the most favored pending notification.
+ */
+static uint8_t ipb_to_pipr(uint8_t ibp)
+{
+ return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
+}
+
+static void ipb_update(uint8_t *regs, uint8_t priority)
+{
+ regs[TM_IPB] |= priority_to_ipb(priority);
+ regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+}
+
+static uint8_t exception_mask(uint8_t ring)
+{
+ switch (ring) {
+ case TM_QW1_OS:
+ return TM_QW1_NSR_EO;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t *regs = &tctx->regs[ring];
+ uint8_t nsr = regs[TM_NSR];
+ uint8_t mask = exception_mask(ring);
+
+ qemu_irq_lower(tctx->output);
+
+ if (regs[TM_NSR] & mask) {
+ uint8_t cppr = regs[TM_PIPR];
+
+ regs[TM_CPPR] = cppr;
+
+ /* Reset the pending buffer bit */
+ regs[TM_IPB] &= ~priority_to_ipb(cppr);
+ regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
+
+ /* Drop Exception bit */
+ regs[TM_NSR] &= ~mask;
+ }
+
+ return (nsr << 8) | regs[TM_CPPR];
+}
+
+static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t *regs = &tctx->regs[ring];
+
+ if (regs[TM_PIPR] < regs[TM_CPPR]) {
+ regs[TM_NSR] |= exception_mask(ring);
+ qemu_irq_raise(tctx->output);
+ }
+}
+
+static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
+{
+ if (cppr > XIVE_PRIORITY_MAX) {
+ cppr = 0xff;
+ }
+
+ tctx->regs[ring + TM_CPPR] = cppr;
+
+ /* CPPR has changed, check if we need to raise a pending exception */
+ xive_tctx_notify(tctx, ring);
+}
+
+/*
+ * XIVE Thread Interrupt Management Area (TIMA)
+ */
+
+/*
+ * Define an access map for each page of the TIMA that we will use in
+ * the memory region ops to filter values when doing loads and stores
+ * of raw registers values
+ *
+ * Registers accessibility bits :
+ *
+ * 0x0 - no access
+ * 0x1 - write only
+ * 0x2 - read only
+ * 0x3 - read/write
+ */
+
+static const uint8_t xive_tm_hw_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 0,
+};
+
+static const uint8_t xive_tm_hv_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-1 OS */ 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 0, 0,
+};
+
+static const uint8_t xive_tm_os_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0,
+ /* QW-1 OS */ 2, 3, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const uint8_t xive_tm_user_view[] = {
+ /* QW-0 User */ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-1 OS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-2 POOL */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* QW-3 PHYS */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/*
+ * Overall TIMA access map for the thread interrupt management context
+ * registers
+ */
+static const uint8_t *xive_tm_views[] = {
+ [XIVE_TM_HW_PAGE] = xive_tm_hw_view,
+ [XIVE_TM_HV_PAGE] = xive_tm_hv_view,
+ [XIVE_TM_OS_PAGE] = xive_tm_os_view,
+ [XIVE_TM_USER_PAGE] = xive_tm_user_view,
+};
+
+/*
+ * Computes a register access mask for a given offset in the TIMA
+ */
+static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
+{
+ uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
+ uint8_t reg_offset = offset & 0x3F;
+ uint8_t reg_mask = write ? 0x1 : 0x2;
+ uint64_t mask = 0x0;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
+ mask |= (uint64_t) 0xff << (8 * (size - i - 1));
+ }
+ }
+
+ return mask;
+}
+
+static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+ uint8_t ring_offset = offset & 0x30;
+ uint8_t reg_offset = offset & 0x3F;
+ uint64_t mask = xive_tm_mask(offset, size, true);
+ int i;
+
+ /*
+ * Only 4 or 8 bytes stores are allowed and the User ring is
+ * excluded
+ */
+ if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
+ HWADDR_PRIx"\n", offset);
+ return;
+ }
+
+ /*
+ * Use the register offset for the raw values and filter out
+ * reserved values
+ */
+ for (i = 0; i < size; i++) {
+ uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
+ if (byte_mask) {
+ tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
+ byte_mask;
+ }
+ }
+}
+
+static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
+{
+ uint8_t ring_offset = offset & 0x30;
+ uint8_t reg_offset = offset & 0x3F;
+ uint64_t mask = xive_tm_mask(offset, size, false);
+ uint64_t ret;
+ int i;
+
+ /*
+ * Only 4 or 8 bytes loads are allowed and the User ring is
+ * excluded
+ */
+ if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
+ HWADDR_PRIx"\n", offset);
+ return -1;
+ }
+
+ /* Use the register offset for the raw values */
+ ret = 0;
+ for (i = 0; i < size; i++) {
+ ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
+ }
+
+ /* filter out reserved values */
+ return ret & mask;
+}
+
+/*
+ * The TM context is mapped twice within each page. Stores and loads
+ * to the first mapping below 2K write and read the specified values
+ * without modification. The second mapping above 2K performs specific
+ * state changes (side effects) in addition to setting/returning the
+ * interrupt management area context of the processor thread.
+ */
+static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
+{
+ return xive_tctx_accept(tctx, TM_QW1_OS);
+}
+
+static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
+}
+
+/*
+ * Adjust the IPB to allow a CPU to process event queues of other
+ * priorities during one physical interrupt cycle.
+ */
+static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
+ xive_tctx_notify(tctx, TM_QW1_OS);
+}
+
+/*
+ * Define a mapping of "special" operations depending on the TIMA page
+ * offset and the size of the operation.
+ */
+typedef struct XiveTmOp {
+ uint8_t page_offset;
+ uint32_t op_offset;
+ unsigned size;
+ void (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value,
+ unsigned size);
+ uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size);
+} XiveTmOp;
+
+static const XiveTmOp xive_tm_operations[] = {
+ /*
+ * MMIOs below 2K : raw values and special operations without side
+ * effects
+ */
+ { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL },
+
+ /* MMIOs above 2K : special operations with side effects */
+ { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg },
+ { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
+};
+
+static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
+{
+ uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
+ uint32_t op_offset = offset & 0xFFF;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
+ const XiveTmOp *xto = &xive_tm_operations[i];
+
+ /* Accesses done from a more privileged TIMA page is allowed */
+ if (xto->page_offset >= page_offset &&
+ xto->op_offset == op_offset &&
+ xto->size == size &&
+ ((write && xto->write_handler) || (!write && xto->read_handler))) {
+ return xto;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * TIMA MMIO handlers
+ */
+static void xive_tm_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+ XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+ const XiveTmOp *xto;
+
+ /*
+ * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
+ */
+
+ /*
+ * First, check for special operations in the 2K region
+ */
+ if (offset & 0x800) {
+ xto = xive_tm_find_op(offset, size, true);
+ if (!xto) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA"
+ "@%"HWADDR_PRIx"\n", offset);
+ } else {
+ xto->write_handler(tctx, offset, value, size);
+ }
+ return;
+ }
+
+ /*
+ * Then, for special operations in the region below 2K.
+ */
+ xto = xive_tm_find_op(offset, size, true);
+ if (xto) {
+ xto->write_handler(tctx, offset, value, size);
+ return;
+ }
+
+ /*
+ * Finish with raw access to the register values
+ */
+ xive_tm_raw_write(tctx, offset, value, size);
+}
+
+static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+ XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+ const XiveTmOp *xto;
+
+ /*
+ * TODO: check V bit in Q[0-3]W2, check PTER bit associated with CPU
+ */
+
+ /*
+ * First, check for special operations in the 2K region
+ */
+ if (offset & 0x800) {
+ xto = xive_tm_find_op(offset, size, false);
+ if (!xto) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
+ "@%"HWADDR_PRIx"\n", offset);
+ return -1;
+ }
+ return xto->read_handler(tctx, offset, size);
+ }
+
+ /*
+ * Then, for special operations in the region below 2K.
+ */
+ xto = xive_tm_find_op(offset, size, false);
+ if (xto) {
+ return xto->read_handler(tctx, offset, size);
+ }
+
+ /*
+ * Finish with raw access to the register values
+ */
+ return xive_tm_raw_read(tctx, offset, size);
+}
+
+const MemoryRegionOps xive_tm_ops = {
+ .read = xive_tm_read,
+ .write = xive_tm_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+ return *((uint32_t *) &ring[TM_WORD2]);
+}
+
+static char *xive_tctx_ring_print(uint8_t *ring)
+{
+ uint32_t w2 = xive_tctx_word2(ring);
+
+ return g_strdup_printf("%02x %02x %02x %02x %02x "
+ "%02x %02x %02x %08x",
+ ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
+ ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
+ be32_to_cpu(w2));
+}
+
+static const char * const xive_tctx_ring_names[] = {
+ "USER", "OS", "POOL", "PHYS",
+};
+
+void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
+{
+ int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
+ int i;
+
+ monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
+ " W2\n", cpu_index);
+
+ for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
+ char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
+ monitor_printf(mon, "CPU[%04x]: %4s %s\n", cpu_index,
+ xive_tctx_ring_names[i], s);
+ g_free(s);
+ }
+}
+
+static void xive_tctx_reset(void *dev)
+{
+ XiveTCTX *tctx = XIVE_TCTX(dev);
+
+ memset(tctx->regs, 0, sizeof(tctx->regs));
+
+ /* Set some defaults */
+ tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
+ tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
+ tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
+
+ /*
+ * Initialize PIPR to 0xFF to avoid phantom interrupts when the
+ * CPPR is first set.
+ */
+ tctx->regs[TM_QW1_OS + TM_PIPR] =
+ ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
+}
+
+static void xive_tctx_realize(DeviceState *dev, Error **errp)
+{
+ XiveTCTX *tctx = XIVE_TCTX(dev);
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ Object *obj;
+ Error *local_err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "cpu", &local_err);
+ if (!obj) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "required link 'cpu' not found: ");
+ return;
+ }
+
+ cpu = POWERPC_CPU(obj);
+ tctx->cs = CPU(obj);
+
+ env = &cpu->env;
+ switch (PPC_INPUT(env)) {
+ case PPC_FLAGS_INPUT_POWER7:
+ tctx->output = env->irq_inputs[POWER7_INPUT_INT];
+ break;
+
+ default:
+ error_setg(errp, "XIVE interrupt controller does not support "
+ "this CPU bus model");
+ return;
+ }
+
+ qemu_register_reset(xive_tctx_reset, dev);
+}
+
+static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
+{
+ qemu_unregister_reset(xive_tctx_reset, dev);
+}
+
+static const VMStateDescription vmstate_xive_tctx = {
+ .name = TYPE_XIVE_TCTX,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_BUFFER(regs, XiveTCTX),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void xive_tctx_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "XIVE Interrupt Thread Context";
+ dc->realize = xive_tctx_realize;
+ dc->unrealize = xive_tctx_unrealize;
+ dc->vmsd = &vmstate_xive_tctx;
+}
+
+static const TypeInfo xive_tctx_info = {
+ .name = TYPE_XIVE_TCTX,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiveTCTX),
+ .class_init = xive_tctx_class_init,
+};
+
+Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp)
+{
+ Error *local_err = NULL;
+ Object *obj;
+
+ obj = object_new(TYPE_XIVE_TCTX);
+ object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort);
+ object_unref(obj);
+ object_property_add_const_link(obj, "cpu", cpu, &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ goto error;
+ }
+
+ return obj;
+
+error:
+ object_unparent(obj);
+ error_propagate(errp, local_err);
+ return NULL;
+}
+
+/*
+ * XIVE ESB helpers
+ */
+
+static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
+{
+ uint8_t old_pq = *pq & 0x3;
+
+ *pq &= ~0x3;
+ *pq |= value & 0x3;
+
+ return old_pq;
+}
+
+static bool xive_esb_trigger(uint8_t *pq)
+{
+ uint8_t old_pq = *pq & 0x3;
+
+ switch (old_pq) {
+ case XIVE_ESB_RESET:
+ xive_esb_set(pq, XIVE_ESB_PENDING);
+ return true;
+ case XIVE_ESB_PENDING:
+ case XIVE_ESB_QUEUED:
+ xive_esb_set(pq, XIVE_ESB_QUEUED);
+ return false;
+ case XIVE_ESB_OFF:
+ xive_esb_set(pq, XIVE_ESB_OFF);
+ return false;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool xive_esb_eoi(uint8_t *pq)
+{
+ uint8_t old_pq = *pq & 0x3;
+
+ switch (old_pq) {
+ case XIVE_ESB_RESET:
+ case XIVE_ESB_PENDING:
+ xive_esb_set(pq, XIVE_ESB_RESET);
+ return false;
+ case XIVE_ESB_QUEUED:
+ xive_esb_set(pq, XIVE_ESB_PENDING);
+ return true;
+ case XIVE_ESB_OFF:
+ xive_esb_set(pq, XIVE_ESB_OFF);
+ return false;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * XIVE Interrupt Source (or IVSE)
+ */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
+{
+ assert(srcno < xsrc->nr_irqs);
+
+ return xsrc->status[srcno] & 0x3;
+}
+
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
+{
+ assert(srcno < xsrc->nr_irqs);
+
+ return xive_esb_set(&xsrc->status[srcno], pq);
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+ uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
+
+ xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+
+ switch (old_pq) {
+ case XIVE_ESB_RESET:
+ xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+ bool ret;
+
+ assert(srcno < xsrc->nr_irqs);
+
+ ret = xive_esb_trigger(&xsrc->status[srcno]);
+
+ if (xive_source_irq_is_lsi(xsrc, srcno) &&
+ xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: queued an event on LSI IRQ %d\n", srcno);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
+{
+ bool ret;
+
+ assert(srcno < xsrc->nr_irqs);
+
+ ret = xive_esb_eoi(&xsrc->status[srcno]);
+
+ /*
+ * LSI sources do not set the Q bit but they can still be
+ * asserted, in which case we should forward a new event
+ * notification
+ */
+ if (xive_source_irq_is_lsi(xsrc, srcno) &&
+ xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+ ret = xive_source_lsi_trigger(xsrc, srcno);
+ }
+
+ return ret;
+}
+
+/*
+ * Forward the source event notification to the Router
+ */
+static void xive_source_notify(XiveSource *xsrc, int srcno)
+{
+ XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
+
+ if (xnc->notify) {
+ xnc->notify(xsrc->xive, srcno);
+ }
+}
+
+/*
+ * In a two pages ESB MMIO setting, even page is the trigger page, odd
+ * page is for management
+ */
+static inline bool addr_is_even(hwaddr addr, uint32_t shift)
+{
+ return !((addr >> shift) & 1);
+}
+
+static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
+{
+ return xive_source_esb_has_2page(xsrc) &&
+ addr_is_even(addr, xsrc->esb_shift - 1);
+}
+
+/*
+ * ESB MMIO loads
+ * Trigger page Management/EOI page
+ *
+ * ESB MMIO setting 2 pages 1 or 2 pages
+ *
+ * 0x000 .. 0x3FF -1 EOI and return 0|1
+ * 0x400 .. 0x7FF -1 EOI and return 0|1
+ * 0x800 .. 0xBFF -1 return PQ
+ * 0xC00 .. 0xCFF -1 return PQ and atomically PQ=00
+ * 0xD00 .. 0xDFF -1 return PQ and atomically PQ=01
+ * 0xE00 .. 0xDFF -1 return PQ and atomically PQ=10
+ * 0xF00 .. 0xDFF -1 return PQ and atomically PQ=11
+ */
+static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
+{
+ XiveSource *xsrc = XIVE_SOURCE(opaque);
+ uint32_t offset = addr & 0xFFF;
+ uint32_t srcno = addr >> xsrc->esb_shift;
+ uint64_t ret = -1;
+
+ /* In a two pages ESB MMIO setting, trigger page should not be read */
+ if (xive_source_is_trigger_page(xsrc, addr)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: invalid load on IRQ %d trigger page at "
+ "0x%"HWADDR_PRIx"\n", srcno, addr);
+ return -1;
+ }
+
+ switch (offset) {
+ case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+ ret = xive_source_esb_eoi(xsrc, srcno);
+
+ /* Forward the source event notification for routing */
+ if (ret) {
+ xive_source_notify(xsrc, srcno);
+ }
+ break;
+
+ case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+ ret = xive_source_esb_get(xsrc, srcno);
+ break;
+
+ case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+ case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+ case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+ case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+ ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
+ offset);
+ }
+
+ return ret;
+}
+
+/*
+ * ESB MMIO stores
+ * Trigger page Management/EOI page
+ *
+ * ESB MMIO setting 2 pages 1 or 2 pages
+ *
+ * 0x000 .. 0x3FF Trigger Trigger
+ * 0x400 .. 0x7FF Trigger EOI
+ * 0x800 .. 0xBFF Trigger undefined
+ * 0xC00 .. 0xCFF Trigger PQ=00
+ * 0xD00 .. 0xDFF Trigger PQ=01
+ * 0xE00 .. 0xDFF Trigger PQ=10
+ * 0xF00 .. 0xDFF Trigger PQ=11
+ */
+static void xive_source_esb_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ XiveSource *xsrc = XIVE_SOURCE(opaque);
+ uint32_t offset = addr & 0xFFF;
+ uint32_t srcno = addr >> xsrc->esb_shift;
+ bool notify = false;
+
+ /* In a two pages ESB MMIO setting, trigger page only triggers */
+ if (xive_source_is_trigger_page(xsrc, addr)) {
+ notify = xive_source_esb_trigger(xsrc, srcno);
+ goto out;
+ }
+
+ switch (offset) {
+ case 0 ... 0x3FF:
+ notify = xive_source_esb_trigger(xsrc, srcno);
+ break;
+
+ case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
+ if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: invalid Store EOI for IRQ %d\n", srcno);
+ return;
+ }
+
+ notify = xive_source_esb_eoi(xsrc, srcno);
+ break;
+
+ case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+ case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+ case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+ case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+ xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+ break;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
+ offset);
+ return;
+ }
+
+out:
+ /* Forward the source event notification for routing */
+ if (notify) {
+ xive_source_notify(xsrc, srcno);
+ }
+}
+
+static const MemoryRegionOps xive_source_esb_ops = {
+ .read = xive_source_esb_read,
+ .write = xive_source_esb_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static void xive_source_set_irq(void *opaque, int srcno, int val)
+{
+ XiveSource *xsrc = XIVE_SOURCE(opaque);
+ bool notify = false;
+
+ if (xive_source_irq_is_lsi(xsrc, srcno)) {
+ if (val) {
+ notify = xive_source_lsi_trigger(xsrc, srcno);
+ } else {
+ xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+ }
+ } else {
+ if (val) {
+ notify = xive_source_esb_trigger(xsrc, srcno);
+ }
+ }
+
+ /* Forward the source event notification for routing */
+ if (notify) {
+ xive_source_notify(xsrc, srcno);
+ }
+}
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
+{
+ int i;
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ uint8_t pq = xive_source_esb_get(xsrc, i);
+
+ if (pq == XIVE_ESB_OFF) {
+ continue;
+ }
+
+ monitor_printf(mon, " %08x %s %c%c%c\n", i + offset,
+ xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
+ pq & XIVE_ESB_VAL_P ? 'P' : '-',
+ pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ');
+ }
+}
+
+static void xive_source_reset(void *dev)
+{
+ XiveSource *xsrc = XIVE_SOURCE(dev);
+
+ /* Do not clear the LSI bitmap */
+
+ /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
+ memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
+}
+
+static void xive_source_realize(DeviceState *dev, Error **errp)
+{
+ XiveSource *xsrc = XIVE_SOURCE(dev);
+ Object *obj;
+ Error *local_err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+ if (!obj) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "required link 'xive' not found: ");
+ return;
+ }
+
+ xsrc->xive = XIVE_NOTIFIER(obj);
+
+ if (!xsrc->nr_irqs) {
+ error_setg(errp, "Number of interrupt needs to be greater than 0");
+ return;
+ }
+
+ if (xsrc->esb_shift != XIVE_ESB_4K &&
+ xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
+ xsrc->esb_shift != XIVE_ESB_64K &&
+ xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
+ error_setg(errp, "Invalid ESB shift setting");
+ return;
+ }
+
+ xsrc->status = g_malloc0(xsrc->nr_irqs);
+ xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
+
+ memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+ &xive_source_esb_ops, xsrc, "xive.esb",
+ (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+
+ xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
+ xsrc->nr_irqs);
+
+ qemu_register_reset(xive_source_reset, dev);
+}
+
+static const VMStateDescription vmstate_xive_source = {
+ .name = TYPE_XIVE_SOURCE,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
+ VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+/*
+ * The default XIVE interrupt source setting for the ESB MMIOs is two
+ * 64k pages without Store EOI, to be in sync with KVM.
+ */
+static Property xive_source_properties[] = {
+ DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
+ DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
+ DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_source_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "XIVE Interrupt Source";
+ dc->props = xive_source_properties;
+ dc->realize = xive_source_realize;
+ dc->vmsd = &vmstate_xive_source;
+}
+
+static const TypeInfo xive_source_info = {
+ .name = TYPE_XIVE_SOURCE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiveSource),
+ .class_init = xive_source_class_init,
+};
+
+/*
+ * XiveEND helpers
+ */
+
+void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
+{
+ uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qentries = 1 << (qsize + 10);
+ int i;
+
+ /*
+ * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
+ */
+ monitor_printf(mon, " [ ");
+ qindex = (qindex - (width - 1)) & (qentries - 1);
+ for (i = 0; i < width; i++) {
+ uint64_t qaddr = qaddr_base + (qindex << 2);
+ uint32_t qdata = -1;
+
+ if (dma_memory_read(&address_space_memory, qaddr, &qdata,
+ sizeof(qdata))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
+ HWADDR_PRIx "\n", qaddr);
+ return;
+ }
+ monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
+ be32_to_cpu(qdata));
+ qindex = (qindex + 1) & (qentries - 1);
+ }
+}
+
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
+{
+ uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qentries = 1 << (qsize + 10);
+
+ uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
+ uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
+
+ if (!xive_end_is_valid(end)) {
+ return;
+ }
+
+ monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
+ "% 6d/%5d ^%d", end_idx,
+ xive_end_is_valid(end) ? 'v' : '-',
+ xive_end_is_enqueue(end) ? 'q' : '-',
+ xive_end_is_notify(end) ? 'n' : '-',
+ xive_end_is_backlog(end) ? 'b' : '-',
+ xive_end_is_escalate(end) ? 'e' : '-',
+ priority, nvt, qaddr_base, qindex, qentries, qgen);
+
+ xive_end_queue_pic_print_info(end, 6, mon);
+ monitor_printf(mon, "]\n");
+}
+
+static void xive_end_enqueue(XiveEND *end, uint32_t data)
+{
+ uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
+ | be32_to_cpu(end->w3);
+ uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
+ uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
+
+ uint64_t qaddr = qaddr_base + (qindex << 2);
+ uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
+ uint32_t qentries = 1 << (qsize + 10);
+
+ if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
+ HWADDR_PRIx "\n", qaddr);
+ return;
+ }
+
+ qindex = (qindex + 1) & (qentries - 1);
+ if (qindex == 0) {
+ qgen ^= 1;
+ end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
+ }
+ end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
+}
+
+/*
+ * XIVE Router (aka. Virtualization Controller or IVRE)
+ */
+
+int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+ XiveEAS *eas)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
+}
+
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+ XiveEND *end)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->get_end(xrtr, end_blk, end_idx, end);
+}
+
+int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+ XiveEND *end, uint8_t word_number)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
+}
+
+int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+ XiveNVT *nvt)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
+}
+
+int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+ XiveNVT *nvt, uint8_t word_number)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
+}
+
+/*
+ * The thread context register words are in big-endian format.
+ */
+static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint32_t logic_serv)
+{
+ uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
+ uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
+ uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
+ uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
+
+ /*
+ * TODO (PowerNV): ignore mode. The low order bits of the NVT
+ * identifier are ignored in the "CAM" match.
+ */
+
+ if (format == 0) {
+ if (cam_ignore == true) {
+ /*
+ * F=0 & i=1: Logical server notification (bits ignored at
+ * the end of the NVT identifier)
+ */
+ qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
+ nvt_blk, nvt_idx);
+ return -1;
+ }
+
+ /* F=0 & i=0: Specific NVT notification */
+
+ /* TODO (PowerNV) : PHYS ring */
+
+ /* HV POOL ring */
+ if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
+ cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
+ return TM_QW2_HV_POOL;
+ }
+
+ /* OS ring */
+ if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
+ cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
+ return TM_QW1_OS;
+ }
+ } else {
+ /* F=1 : User level Event-Based Branch (EBB) notification */
+
+ /* USER ring */
+ if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
+ (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
+ (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
+ (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
+ return TM_QW0_USER;
+ }
+ }
+ return -1;
+}
+
+typedef struct XiveTCTXMatch {
+ XiveTCTX *tctx;
+ uint8_t ring;
+} XiveTCTXMatch;
+
+static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
+{
+ CPUState *cs;
+
+ /*
+ * TODO (PowerNV): handle chip_id overwrite of block field for
+ * hardwired CAM compares
+ */
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ XiveTCTX *tctx = XIVE_TCTX(cpu->intc);
+ int ring;
+
+ /*
+ * HW checks that the CPU is enabled in the Physical Thread
+ * Enable Register (PTER).
+ */
+
+ /*
+ * Check the thread context CAM lines and record matches. We
+ * will handle CPU exception delivery later
+ */
+ ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
+ cam_ignore, logic_serv);
+ /*
+ * Save the context and follow on to catch duplicates, that we
+ * don't support yet.
+ */
+ if (ring != -1) {
+ if (match->tctx) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
+ "context NVT %x/%x\n", nvt_blk, nvt_idx);
+ return false;
+ }
+
+ match->ring = ring;
+ match->tctx = tctx;
+ }
+ }
+
+ if (!match->tctx) {
+ qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
+ nvt_blk, nvt_idx);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This is our simple Xive Presenter Engine model. It is merged in the
+ * Router as it does not require an extra object.
+ *
+ * It receives notification requests sent by the IVRE to find one
+ * matching NVT (or more) dispatched on the processor threads. In case
+ * of a single NVT notification, the process is abreviated and the
+ * thread is signaled if a match is found. In case of a logical server
+ * notification (bits ignored at the end of the NVT identifier), the
+ * IVPE and IVRE select a winning thread using different filters. This
+ * involves 2 or 3 exchanges on the PowerBus that the model does not
+ * support.
+ *
+ * The parameters represent what is sent on the PowerBus
+ */
+static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv)
+{
+ XiveNVT nvt;
+ XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
+ bool found;
+
+ /* NVT cache lookup */
+ if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
+ nvt_blk, nvt_idx);
+ return;
+ }
+
+ if (!xive_nvt_is_valid(&nvt)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
+ nvt_blk, nvt_idx);
+ return;
+ }
+
+ found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
+ priority, logic_serv, &match);
+ if (found) {
+ ipb_update(&match.tctx->regs[match.ring], priority);
+ xive_tctx_notify(match.tctx, match.ring);
+ return;
+ }
+
+ /* Record the IPB in the associated NVT structure */
+ ipb_update((uint8_t *) &nvt.w4, priority);
+ xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
+
+ /*
+ * If no matching NVT is dispatched on a HW thread :
+ * - update the NVT structure if backlog is activated
+ * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
+ * activated
+ */
+}
+
+/*
+ * An END trigger can come from an event trigger (IPI or HW) or from
+ * another chip. We don't model the PowerBus but the END trigger
+ * message has the same parameters than in the function below.
+ */
+static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
+ uint32_t end_idx, uint32_t end_data)
+{
+ XiveEND end;
+ uint8_t priority;
+ uint8_t format;
+
+ /* END cache lookup */
+ if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+ end_idx);
+ return;
+ }
+
+ if (!xive_end_is_valid(&end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+ end_blk, end_idx);
+ return;
+ }
+
+ if (xive_end_is_enqueue(&end)) {
+ xive_end_enqueue(&end, end_data);
+ /* Enqueuing event data modifies the EQ toggle and index */
+ xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
+ }
+
+ /*
+ * The W7 format depends on the F bit in W6. It defines the type
+ * of the notification :
+ *
+ * F=0 : single or multiple NVT notification
+ * F=1 : User level Event-Based Branch (EBB) notification, no
+ * priority
+ */
+ format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
+ priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
+
+ /* The END is masked */
+ if (format == 0 && priority == 0xff) {
+ return;
+ }
+
+ /*
+ * Check the END ESn (Event State Buffer for notification) for
+ * even futher coalescing in the Router
+ */
+ if (!xive_end_is_notify(&end)) {
+ uint8_t pq = xive_get_field32(END_W1_ESn, end.w1);
+ bool notify = xive_esb_trigger(&pq);
+
+ if (pq != xive_get_field32(END_W1_ESn, end.w1)) {
+ end.w1 = xive_set_field32(END_W1_ESn, end.w1, pq);
+ xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
+ }
+
+ /* ESn[Q]=1 : end of notification */
+ if (!notify) {
+ return;
+ }
+ }
+
+ /*
+ * Follows IVPE notification
+ */
+ xive_presenter_notify(xrtr, format,
+ xive_get_field32(END_W6_NVT_BLOCK, end.w6),
+ xive_get_field32(END_W6_NVT_INDEX, end.w6),
+ xive_get_field32(END_W7_F0_IGNORE, end.w7),
+ priority,
+ xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
+
+ /* TODO: Auto EOI. */
+}
+
+static void xive_router_notify(XiveNotifier *xn, uint32_t lisn)
+{
+ XiveRouter *xrtr = XIVE_ROUTER(xn);
+ uint8_t eas_blk = XIVE_SRCNO_BLOCK(lisn);
+ uint32_t eas_idx = XIVE_SRCNO_INDEX(lisn);
+ XiveEAS eas;
+
+ /* EAS cache lookup */
+ if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
+ return;
+ }
+
+ /*
+ * The IVRE checks the State Bit Cache at this point. We skip the
+ * SBC lookup because the state bits of the sources are modeled
+ * internally in QEMU.
+ */
+
+ if (!xive_eas_is_valid(&eas)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
+ return;
+ }
+
+ if (xive_eas_is_masked(&eas)) {
+ /* Notification completed */
+ return;
+ }
+
+ /*
+ * The event trigger becomes an END trigger
+ */
+ xive_router_end_notify(xrtr,
+ xive_get_field64(EAS_END_BLOCK, eas.w),
+ xive_get_field64(EAS_END_INDEX, eas.w),
+ xive_get_field64(EAS_END_DATA, eas.w));
+}
+
+static void xive_router_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
+
+ dc->desc = "XIVE Router Engine";
+ xnc->notify = xive_router_notify;
+}
+
+static const TypeInfo xive_router_info = {
+ .name = TYPE_XIVE_ROUTER,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .abstract = true,
+ .class_size = sizeof(XiveRouterClass),
+ .class_init = xive_router_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_NOTIFIER },
+ { }
+ }
+};
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
+{
+ if (!xive_eas_is_valid(eas)) {
+ return;
+ }
+
+ monitor_printf(mon, " %08x %s end:%02x/%04x data:%08x\n",
+ lisn, xive_eas_is_masked(eas) ? "M" : " ",
+ (uint8_t) xive_get_field64(EAS_END_BLOCK, eas->w),
+ (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
+ (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
+}
+
+/*
+ * END ESB MMIO loads
+ */
+static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
+{
+ XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
+ uint32_t offset = addr & 0xFFF;
+ uint8_t end_blk;
+ uint32_t end_idx;
+ XiveEND end;
+ uint32_t end_esmask;
+ uint8_t pq;
+ uint64_t ret = -1;
+
+ end_blk = xsrc->block_id;
+ end_idx = addr >> (xsrc->esb_shift + 1);
+
+ if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+ end_idx);
+ return -1;
+ }
+
+ if (!xive_end_is_valid(&end)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+ end_blk, end_idx);
+ return -1;
+ }
+
+ end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
+ pq = xive_get_field32(end_esmask, end.w1);
+
+ switch (offset) {
+ case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+ ret = xive_esb_eoi(&pq);
+
+ /* Forward the source event notification for routing ?? */
+ break;
+
+ case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+ ret = pq;
+ break;
+
+ case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+ case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+ case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+ case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+ ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
+ offset);
+ return -1;
+ }
+
+ if (pq != xive_get_field32(end_esmask, end.w1)) {
+ end.w1 = xive_set_field32(end_esmask, end.w1, pq);
+ xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
+ }
+
+ return ret;
+}
+
+/*
+ * END ESB MMIO stores are invalid
+ */
+static void xive_end_source_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
+ HWADDR_PRIx"\n", addr);
+}
+
+static const MemoryRegionOps xive_end_source_ops = {
+ .read = xive_end_source_read,
+ .write = xive_end_source_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static void xive_end_source_realize(DeviceState *dev, Error **errp)
+{
+ XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
+ Object *obj;
+ Error *local_err = NULL;
+
+ obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
+ if (!obj) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "required link 'xive' not found: ");
+ return;
+ }
+
+ xsrc->xrtr = XIVE_ROUTER(obj);
+
+ if (!xsrc->nr_ends) {
+ error_setg(errp, "Number of interrupt needs to be greater than 0");
+ return;
+ }
+
+ if (xsrc->esb_shift != XIVE_ESB_4K &&
+ xsrc->esb_shift != XIVE_ESB_64K) {
+ error_setg(errp, "Invalid ESB shift setting");
+ return;
+ }
+
+ /*
+ * Each END is assigned an even/odd pair of MMIO pages, the even page
+ * manages the ESn field while the odd page manages the ESe field.
+ */
+ memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+ &xive_end_source_ops, xsrc, "xive.end",
+ (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
+}
+
+static Property xive_end_source_properties[] = {
+ DEFINE_PROP_UINT8("block-id", XiveENDSource, block_id, 0),
+ DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
+ DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_end_source_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "XIVE END Source";
+ dc->props = xive_end_source_properties;
+ dc->realize = xive_end_source_realize;
+}
+
+static const TypeInfo xive_end_source_info = {
+ .name = TYPE_XIVE_END_SOURCE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiveENDSource),
+ .class_init = xive_end_source_class_init,
+};
+
+/*
+ * XIVE Fabric
+ */
+static const TypeInfo xive_fabric_info = {
+ .name = TYPE_XIVE_NOTIFIER,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(XiveNotifierClass),
+};
+
+static void xive_register_types(void)
+{
+ type_register_static(&xive_source_info);
+ type_register_static(&xive_fabric_info);
+ type_register_static(&xive_router_info);
+ type_register_static(&xive_end_source_info);
+ type_register_static(&xive_tctx_info);
+}
+
+type_init(xive_register_types)
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index e6747fc..b20fea0 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -685,7 +685,7 @@ static void ppce500_cpu_reset(void *opaque)
}
static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
- qemu_irq **irqs)
+ IrqLines *irqs)
{
DeviceState *dev;
SysBusDevice *s;
@@ -705,7 +705,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
k = 0;
for (i = 0; i < smp_cpus; i++) {
for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
- sysbus_connect_irq(s, k++, irqs[i][j]);
+ sysbus_connect_irq(s, k++, irqs[i].irq[j]);
}
}
@@ -713,7 +713,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
}
static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
- qemu_irq **irqs, Error **errp)
+ IrqLines *irqs, Error **errp)
{
Error *err = NULL;
DeviceState *dev;
@@ -742,7 +742,7 @@ static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms,
MemoryRegion *ccsr,
- qemu_irq **irqs)
+ IrqLines *irqs)
{
MachineState *machine = MACHINE(pms);
const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
@@ -806,15 +806,14 @@ void ppce500_init(MachineState *machine)
/* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
* 4 respectively */
unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
- qemu_irq **irqs;
+ IrqLines *irqs;
DeviceState *dev, *mpicdev;
CPUPPCState *firstenv = NULL;
MemoryRegion *ccsr_addr_space;
SysBusDevice *s;
PPCE500CCSRState *ccsr;
- irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
- irqs[0] = g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
+ irqs = g_new0(IrqLines, smp_cpus);
for (i = 0; i < smp_cpus; i++) {
PowerPCCPU *cpu;
CPUState *cs;
@@ -834,10 +833,9 @@ void ppce500_init(MachineState *machine)
firstenv = env;
}
- irqs[i] = irqs[0] + (i * OPENPIC_OUTPUT_NB);
input = (qemu_irq *)env->irq_inputs;
- irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
- irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
+ irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
+ irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 7e45afa..bb19eab 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -115,7 +115,7 @@ static void ppc_core99_init(MachineState *machine)
PowerPCCPU *cpu = NULL;
CPUPPCState *env = NULL;
char *filename;
- qemu_irq **openpic_irqs;
+ IrqLines *openpic_irqs;
int linux_boot, i, j, k;
MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1);
hwaddr kernel_base, initrd_base, cmdline_base = 0;
@@ -248,41 +248,37 @@ static void ppc_core99_init(MachineState *machine)
memory_region_add_subregion(get_system_memory(), 0xf8000000,
sysbus_mmio_get_region(s, 0));
- openpic_irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
- openpic_irqs[0] =
- g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
+ openpic_irqs = g_new0(IrqLines, smp_cpus);
for (i = 0; i < smp_cpus; i++) {
/* Mac99 IRQ connection between OpenPIC outputs pins
* and PowerPC input pins
*/
switch (PPC_INPUT(env)) {
case PPC_FLAGS_INPUT_6xx:
- openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB);
- openpic_irqs[i][OPENPIC_OUTPUT_INT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_CINT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_MCK] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP];
/* Not connected ? */
- openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL;
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
/* Check this */
- openpic_irqs[i][OPENPIC_OUTPUT_RESET] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET];
break;
#if defined(TARGET_PPC64)
case PPC_FLAGS_INPUT_970:
- openpic_irqs[i] = openpic_irqs[0] + (i * OPENPIC_OUTPUT_NB);
- openpic_irqs[i][OPENPIC_OUTPUT_INT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_CINT] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
- openpic_irqs[i][OPENPIC_OUTPUT_MCK] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP];
/* Not connected ? */
- openpic_irqs[i][OPENPIC_OUTPUT_DEBUG] = NULL;
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
/* Check this */
- openpic_irqs[i][OPENPIC_OUTPUT_RESET] =
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET];
break;
#endif /* defined(TARGET_PPC64) */
@@ -299,7 +295,7 @@ static void ppc_core99_init(MachineState *machine)
k = 0;
for (i = 0; i < smp_cpus; i++) {
for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
- sysbus_connect_irq(s, k++, openpic_irqs[i][j]);
+ sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]);
}
}
g_free(openpic_irqs);
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index 1b0a0a8..f47b15f 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -149,7 +149,7 @@ static void ref405ep_init(MachineState *machine)
MemoryRegion *bios;
MemoryRegion *sram = g_new(MemoryRegion, 1);
ram_addr_t bdloc;
- MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories));
+ MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
hwaddr ram_bases[2], ram_sizes[2];
target_ulong sram_size;
long bios_size;
@@ -448,7 +448,7 @@ static void taihu_405ep_init(MachineState *machine)
qemu_irq *pic;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *bios;
- MemoryRegion *ram_memories = g_malloc(2 * sizeof(*ram_memories));
+ MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
MemoryRegion *ram = g_malloc0(sizeof(*ram));
hwaddr ram_bases[2], ram_sizes[2];
long bios_size;
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index 5c58415..e1aadf1 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -1519,7 +1519,7 @@ CPUPPCState *ppc405cr_init(MemoryRegion *address_space_mem,
/* OBP arbitrer */
ppc4xx_opba_init(0xef600600);
/* Universal interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] =
@@ -1877,7 +1877,7 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
/* Initialize timers */
ppc_booke_timers_init(cpu, sysclk, 0);
/* Universal interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] =
((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] =
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index f5720f9..b8aa55d 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -169,8 +169,7 @@ static void bamboo_init(MachineState *machine)
unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
MemoryRegion *address_space_mem = get_system_memory();
MemoryRegion *isa = g_new(MemoryRegion, 1);
- MemoryRegion *ram_memories
- = g_malloc(PPC440EP_SDRAM_NR_BANKS * sizeof(*ram_memories));
+ MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS);
hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS];
hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS];
qemu_irq *pic;
@@ -200,7 +199,7 @@ static void bamboo_init(MachineState *machine)
ppc_dcr_init(env, NULL, NULL);
/* interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
pic = ppcuic_init(env, irqs, 0x0C0, 0, 1);
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 5aac58f..4b051c0 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -430,7 +430,7 @@ static void sam460ex_init(MachineState *machine)
ppc4xx_plb_init(env);
/* interrupt controllers */
- irqs = g_malloc0(sizeof(*irqs) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
uic[0] = ppcuic_init(env, irqs, 0xc0, 0, 1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 55be0f5..19a07c5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -150,7 +150,7 @@ static void pre_2_10_vmstate_unregister_dummy_icp(int i)
(void *)(uintptr_t) i);
}
-static int xics_max_server_number(sPAPRMachineState *spapr)
+int spapr_max_server_number(sPAPRMachineState *spapr)
{
assert(spapr->vsmt);
return DIV_ROUND_UP(max_cpus * spapr->vsmt, smp_threads);
@@ -889,8 +889,6 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
/* ibm,associativity-lookup-arrays */
buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
cur_index = int_buf = g_malloc0(buf_len);
-
- cur_index = int_buf;
int_buf[0] = cpu_to_be32(nr_nodes);
int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
cur_index += 2;
@@ -1033,7 +1031,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
cpu_to_be32(0),
cpu_to_be32(0),
cpu_to_be32(0),
- cpu_to_be32(nb_numa_nodes ? nb_numa_nodes - 1 : 0),
+ cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1),
};
_FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
@@ -1097,15 +1095,18 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
spapr_dt_rtas_tokens(fdt, rtas);
}
-/* Prepare ibm,arch-vec-5-platform-support, which indicates the MMU features
- * that the guest may request and thus the valid values for bytes 24..26 of
- * option vector 5: */
-static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
+/*
+ * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU
+ * and the XIVE features that the guest may request and thus the valid
+ * values for bytes 23..26 of option vector 5:
+ */
+static void spapr_dt_ov5_platform_support(sPAPRMachineState *spapr, void *fdt,
+ int chosen)
{
PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
char val[2 * 4] = {
- 23, 0x00, /* Xive mode, filled in below. */
+ 23, spapr->irq->ov5, /* Xive mode. */
24, 0x00, /* Hash/Radix, filled in below. */
25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
26, 0x40, /* Radix options: GTSE == yes. */
@@ -1113,7 +1114,11 @@ static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
first_ppc_cpu->compat_pvr)) {
- /* If we're in a pre POWER9 compat mode then the guest should do hash */
+ /*
+ * If we're in a pre POWER9 compat mode then the guest should
+ * do hash and use the legacy interrupt mode
+ */
+ val[1] = 0x00; /* XICS */
val[3] = 0x00; /* Hash */
} else if (kvm_enabled()) {
if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
@@ -1191,7 +1196,7 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
_FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
}
- spapr_dt_ov5_platform_support(fdt, chosen);
+ spapr_dt_ov5_platform_support(spapr, fdt, chosen);
g_free(stdout_path);
g_free(bootlist);
@@ -1270,7 +1275,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
_FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
/* /interrupt controller */
- spapr_dt_xics(xics_max_server_number(spapr), fdt, PHANDLE_XICP);
+ spapr->irq->dt_populate(spapr, spapr_max_server_number(spapr), fdt,
+ PHANDLE_XICP);
ret = spapr_populate_memory(spapr, fdt);
if (ret < 0) {
@@ -1290,7 +1296,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
}
QLIST_FOREACH(phb, &spapr->phbs, list) {
- ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt, smc->irq->nr_msis);
+ ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt,
+ spapr->irq->nr_msis);
if (ret < 0) {
error_report("couldn't setup PCI devices in fdt");
exit(1);
@@ -1620,6 +1627,12 @@ static void spapr_machine_reset(void)
qemu_devices_reset();
+ /*
+ * This is fixing some of the default configuration of the XIVE
+ * devices. To be called after the reset of the machine devices.
+ */
+ spapr_irq_reset(spapr, &error_fatal);
+
/* DRC reset may cause a device to be unplugged. This will cause troubles
* if this device is used by another device (eg, a running vhost backend
* will crash QEMU if the DIMM holding the vring goes away). To avoid such
@@ -1731,14 +1744,6 @@ static int spapr_post_load(void *opaque, int version_id)
return err;
}
- if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
- CPUState *cs;
- CPU_FOREACH(cs) {
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- icp_resend(ICP(cpu->intc));
- }
- }
-
/* In earlier versions, there was no separate qdev for the PAPR
* RTC, so the RTC offset was stored directly in sPAPREnvironment.
* So when migrating from those versions, poke the incoming offset
@@ -1759,6 +1764,11 @@ static int spapr_post_load(void *opaque, int version_id)
}
}
+ err = spapr_irq_post_load(spapr, version_id);
+ if (err) {
+ return err;
+ }
+
return err;
}
@@ -2466,15 +2476,10 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
boot_cores_nr = possible_cpus->len;
}
- /* VSMT must be set in order to be able to compute VCPU ids, ie to
- * call xics_max_server_number() or spapr_vcpu_id().
- */
- spapr_set_vsmt_mode(spapr, &error_fatal);
-
if (smc->pre_2_10_has_unused_icps) {
int i;
- for (i = 0; i < xics_max_server_number(spapr); i++) {
+ for (i = 0; i < spapr_max_server_number(spapr); i++) {
/* Dummy entries get deregistered when real ICPState objects
* are registered during CPU core hotplug.
*/
@@ -2593,8 +2598,14 @@ static void spapr_machine_init(MachineState *machine)
/* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
+ /*
+ * VSMT must be set in order to be able to compute VCPU ids, ie to
+ * call spapr_max_server_number() or spapr_vcpu_id().
+ */
+ spapr_set_vsmt_mode(spapr, &error_fatal);
+
/* Set up Interrupt Controller before we create the VCPUs */
- smc->irq->init(spapr, &error_fatal);
+ spapr_irq_init(spapr, &error_fatal);
/* Set up containers for ibm,client-architecture-support negotiated options
*/
@@ -2621,6 +2632,17 @@ static void spapr_machine_init(MachineState *machine)
/* advertise support for ibm,dyamic-memory-v2 */
spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
+ /* advertise XIVE on POWER9 machines */
+ if (spapr->irq->ov5 & SPAPR_OV5_XIVE_EXPLOIT) {
+ if (ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
+ 0, spapr->max_compat_pvr)) {
+ spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
+ } else {
+ error_report("XIVE-only machines require a POWER9 CPU");
+ exit(1);
+ }
+ }
+
/* init CPUs */
spapr_init_cpus(spapr);
@@ -3031,9 +3053,38 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
visit_type_uint32(v, name, (uint32_t *)opaque, errp);
}
+static char *spapr_get_ic_mode(Object *obj, Error **errp)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (spapr->irq == &spapr_irq_xics_legacy) {
+ return g_strdup("legacy");
+ } else if (spapr->irq == &spapr_irq_xics) {
+ return g_strdup("xics");
+ } else if (spapr->irq == &spapr_irq_xive) {
+ return g_strdup("xive");
+ }
+ g_assert_not_reached();
+}
+
+static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
+{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+ /* The legacy IRQ backend can not be set */
+ if (strcmp(value, "xics") == 0) {
+ spapr->irq = &spapr_irq_xics;
+ } else if (strcmp(value, "xive") == 0) {
+ spapr->irq = &spapr_irq_xive;
+ } else {
+ error_setg(errp, "Bad value for \"ic-mode\" property");
+ }
+}
+
static void spapr_instance_init(Object *obj)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+ sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
spapr->htab_fd = -1;
spapr->use_hotplug_event_source = true;
@@ -3067,6 +3118,14 @@ static void spapr_instance_init(Object *obj)
" the host's SMT mode", &error_abort);
object_property_add_bool(obj, "vfio-no-msix-emulation",
spapr_get_msix_emulation, NULL, NULL);
+
+ /* The machine class defines the default interrupt controller mode */
+ spapr->irq = smc->irq;
+ object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
+ spapr_set_ic_mode, NULL);
+ object_property_set_description(obj, "ic-mode",
+ "Specifies the interrupt controller mode (xics, xive)",
+ NULL);
}
static void spapr_machine_finalizefn(Object *obj)
@@ -3789,9 +3848,8 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
- smc->irq->print_info(spapr, mon);
+ spapr->irq->print_info(spapr, mon);
}
int spapr_get_vcpu_id(PowerPCCPU *cpu)
@@ -3873,7 +3931,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
hc->unplug = spapr_machine_device_unplug;
smc->dr_lmb_enabled = true;
- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
mc->has_hotpluggable_cpus = true;
smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
fwc->get_dev_path = spapr_get_fw_dev_path;
@@ -3970,6 +4028,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
{
spapr_machine_4_0_class_options(mc);
SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_3_1);
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
}
DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 2398ce6..8266643 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -11,7 +11,6 @@
#include "hw/ppc/spapr_cpu_core.h"
#include "target/ppc/cpu.h"
#include "hw/ppc/spapr.h"
-#include "hw/ppc/xics.h" /* for icp_create() - to be removed */
#include "hw/boards.h"
#include "qapi/error.h"
#include "sysemu/cpus.h"
@@ -233,8 +232,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
- cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
- &local_err);
+ cpu->intc = spapr->irq->cpu_intc_create(spapr, OBJECT(cpu), &local_err);
if (local_err) {
goto error_unregister;
}
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 1b0880a..b56466f 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -93,7 +93,7 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
if (!table) {
*fd = -1;
- table = g_malloc0(nb_table * sizeof(uint64_t));
+ table = g_new0(uint64_t, nb_table);
}
trace_spapr_iommu_new_table(liobn, table, *fd);
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index e77b94c..7b3b5af 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -12,6 +12,7 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_xive.h"
#include "hw/ppc/xics.h"
#include "sysemu/kvm.h"
@@ -93,15 +94,9 @@ error:
static void spapr_irq_init_xics(sPAPRMachineState *spapr, Error **errp)
{
MachineState *machine = MACHINE(spapr);
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
- int nr_irqs = smc->irq->nr_irqs;
+ int nr_irqs = spapr->irq->nr_irqs;
Error *local_err = NULL;
- /* Initialize the MSI IRQ allocator. */
- if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
- spapr_irq_msi_init(spapr, smc->irq->nr_msis);
- }
-
if (kvm_enabled()) {
if (machine_kernel_irqchip_allowed(machine) &&
!xics_kvm_init(spapr, &local_err)) {
@@ -195,6 +190,24 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon)
ics_pic_print_info(spapr->ics, mon);
}
+static Object *spapr_irq_cpu_intc_create_xics(sPAPRMachineState *spapr,
+ Object *cpu, Error **errp)
+{
+ return icp_create(cpu, spapr->icp_type, XICS_FABRIC(spapr), errp);
+}
+
+static int spapr_irq_post_load_xics(sPAPRMachineState *spapr, int version_id)
+{
+ if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+ CPUState *cs;
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ icp_resend(ICP(cpu->intc));
+ }
+ }
+ return 0;
+}
+
#define SPAPR_IRQ_XICS_NR_IRQS 0x1000
#define SPAPR_IRQ_XICS_NR_MSIS \
(XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI)
@@ -202,37 +215,184 @@ static void spapr_irq_print_info_xics(sPAPRMachineState *spapr, Monitor *mon)
sPAPRIrq spapr_irq_xics = {
.nr_irqs = SPAPR_IRQ_XICS_NR_IRQS,
.nr_msis = SPAPR_IRQ_XICS_NR_MSIS,
+ .ov5 = SPAPR_OV5_XIVE_LEGACY,
.init = spapr_irq_init_xics,
.claim = spapr_irq_claim_xics,
.free = spapr_irq_free_xics,
.qirq = spapr_qirq_xics,
.print_info = spapr_irq_print_info_xics,
+ .dt_populate = spapr_dt_xics,
+ .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
+ .post_load = spapr_irq_post_load_xics,
+};
+
+/*
+ * XIVE IRQ backend.
+ */
+static void spapr_irq_init_xive(sPAPRMachineState *spapr, Error **errp)
+{
+ MachineState *machine = MACHINE(spapr);
+ uint32_t nr_servers = spapr_max_server_number(spapr);
+ DeviceState *dev;
+ int i;
+
+ /* KVM XIVE device not yet available */
+ if (kvm_enabled()) {
+ if (machine_kernel_irqchip_required(machine)) {
+ error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
+ return;
+ }
+ }
+
+ dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
+ qdev_prop_set_uint32(dev, "nr-irqs", spapr->irq->nr_irqs);
+ /*
+ * 8 XIVE END structures per CPU. One for each available priority
+ */
+ qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
+ qdev_init_nofail(dev);
+
+ spapr->xive = SPAPR_XIVE(dev);
+
+ /* Enable the CPU IPIs */
+ for (i = 0; i < nr_servers; ++i) {
+ spapr_xive_irq_claim(spapr->xive, SPAPR_IRQ_IPI + i, false);
+ }
+
+ spapr_xive_hcall_init(spapr);
+}
+
+static int spapr_irq_claim_xive(sPAPRMachineState *spapr, int irq, bool lsi,
+ Error **errp)
+{
+ if (!spapr_xive_irq_claim(spapr->xive, irq, lsi)) {
+ error_setg(errp, "IRQ %d is invalid", irq);
+ return -1;
+ }
+ return 0;
+}
+
+static void spapr_irq_free_xive(sPAPRMachineState *spapr, int irq, int num)
+{
+ int i;
+
+ for (i = irq; i < irq + num; ++i) {
+ spapr_xive_irq_free(spapr->xive, i);
+ }
+}
+
+static qemu_irq spapr_qirq_xive(sPAPRMachineState *spapr, int irq)
+{
+ return spapr_xive_qirq(spapr->xive, irq);
+}
+
+static void spapr_irq_print_info_xive(sPAPRMachineState *spapr,
+ Monitor *mon)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ xive_tctx_pic_print_info(XIVE_TCTX(cpu->intc), mon);
+ }
+
+ spapr_xive_pic_print_info(spapr->xive, mon);
+}
+
+static Object *spapr_irq_cpu_intc_create_xive(sPAPRMachineState *spapr,
+ Object *cpu, Error **errp)
+{
+ Object *obj = xive_tctx_create(cpu, XIVE_ROUTER(spapr->xive), errp);
+
+ /*
+ * (TCG) Early setting the OS CAM line for hotplugged CPUs as they
+ * don't benificiate from the reset of the XIVE IRQ backend
+ */
+ spapr_xive_set_tctx_os_cam(XIVE_TCTX(obj));
+ return obj;
+}
+
+static int spapr_irq_post_load_xive(sPAPRMachineState *spapr, int version_id)
+{
+ return 0;
+}
+
+static void spapr_irq_reset_xive(sPAPRMachineState *spapr, Error **errp)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ /* (TCG) Set the OS CAM line of the thread interrupt context. */
+ spapr_xive_set_tctx_os_cam(XIVE_TCTX(cpu->intc));
+ }
+}
+
+/*
+ * XIVE uses the full IRQ number space. Set it to 8K to be compatible
+ * with XICS.
+ */
+
+#define SPAPR_IRQ_XIVE_NR_IRQS 0x2000
+#define SPAPR_IRQ_XIVE_NR_MSIS (SPAPR_IRQ_XIVE_NR_IRQS - SPAPR_IRQ_MSI)
+
+sPAPRIrq spapr_irq_xive = {
+ .nr_irqs = SPAPR_IRQ_XIVE_NR_IRQS,
+ .nr_msis = SPAPR_IRQ_XIVE_NR_MSIS,
+ .ov5 = SPAPR_OV5_XIVE_EXPLOIT,
+
+ .init = spapr_irq_init_xive,
+ .claim = spapr_irq_claim_xive,
+ .free = spapr_irq_free_xive,
+ .qirq = spapr_qirq_xive,
+ .print_info = spapr_irq_print_info_xive,
+ .dt_populate = spapr_dt_xive,
+ .cpu_intc_create = spapr_irq_cpu_intc_create_xive,
+ .post_load = spapr_irq_post_load_xive,
+ .reset = spapr_irq_reset_xive,
};
/*
* sPAPR IRQ frontend routines for devices
*/
+void spapr_irq_init(sPAPRMachineState *spapr, Error **errp)
+{
+ /* Initialize the MSI IRQ allocator. */
+ if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ spapr_irq_msi_init(spapr, spapr->irq->nr_msis);
+ }
+
+ spapr->irq->init(spapr, errp);
+}
int spapr_irq_claim(sPAPRMachineState *spapr, int irq, bool lsi, Error **errp)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-
- return smc->irq->claim(spapr, irq, lsi, errp);
+ return spapr->irq->claim(spapr, irq, lsi, errp);
}
void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-
- smc->irq->free(spapr, irq, num);
+ spapr->irq->free(spapr, irq, num);
}
qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq)
{
- sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ return spapr->irq->qirq(spapr, irq);
+}
+
+int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id)
+{
+ return spapr->irq->post_load(spapr, version_id);
+}
- return smc->irq->qirq(spapr, irq);
+void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp)
+{
+ if (spapr->irq->reset) {
+ spapr->irq->reset(spapr, errp);
+ }
}
/*
@@ -295,10 +455,14 @@ int spapr_irq_find(sPAPRMachineState *spapr, int num, bool align, Error **errp)
sPAPRIrq spapr_irq_xics_legacy = {
.nr_irqs = SPAPR_IRQ_XICS_LEGACY_NR_IRQS,
.nr_msis = SPAPR_IRQ_XICS_LEGACY_NR_IRQS,
+ .ov5 = SPAPR_OV5_XIVE_LEGACY,
.init = spapr_irq_init_xics,
.claim = spapr_irq_claim_xics,
.free = spapr_irq_free_xics,
.qirq = spapr_qirq_xics,
.print_info = spapr_irq_print_info_xics,
+ .dt_populate = spapr_dt_xics,
+ .cpu_intc_create = spapr_irq_cpu_intc_create_xics,
+ .post_load = spapr_irq_post_load_xics,
};
diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c
index 329feb1..cb8a410 100644
--- a/hw/ppc/spapr_rtas_ddw.c
+++ b/hw/ppc/spapr_rtas_ddw.c
@@ -96,9 +96,8 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
uint32_t nret, target_ulong rets)
{
sPAPRPHBState *sphb;
- uint64_t buid, max_window_size;
+ uint64_t buid;
uint32_t avail, addr, pgmask = 0;
- MachineState *machine = MACHINE(spapr);
if ((nargs != 3) || (nret != 5)) {
goto param_error_exit;
@@ -114,27 +113,15 @@ static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
/* Translate page mask to LoPAPR format */
pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask);
- /*
- * This is "Largest contiguous block of TCEs allocated specifically
- * for (that is, are reserved for) this PE".
- * Return the maximum number as maximum supported RAM size was in 4K pages.
- */
- if (machine->ram_size == machine->maxram_size) {
- max_window_size = machine->ram_size;
- } else {
- max_window_size = machine->device_memory->base +
- memory_region_size(&machine->device_memory->mr);
- }
-
avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, avail);
- rtas_st(rets, 2, max_window_size >> SPAPR_TCE_PAGE_SHIFT);
+ rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */
rtas_st(rets, 3, pgmask);
rtas_st(rets, 4, 0); /* DMA migration mask, not supported */
- trace_spapr_iommu_ddw_query(buid, addr, avail, max_window_size, pgmask);
+ trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask);
return;
param_error_exit:
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 840d4a3..7e8a9ad 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -730,7 +730,7 @@ void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt)
}
/* Copy out into an array of pointers */
- qdevs = g_malloc(sizeof(qdev) * num);
+ qdevs = g_new(DeviceState *, num);
num = 0;
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
qdevs[num++] = kid->child;
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index ee9b4b4..5177120 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -105,7 +105,7 @@ static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
ppc_dcr_init(env, NULL, NULL);
/* interrupt controller */
- irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
+ irqs = g_new0(qemu_irq, PPCUIC_OUTPUT_NB);
irqs[PPCUIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT];
irqs[PPCUIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT];
ppcuic_init(env, irqs, 0x0C0, 0, 1);