aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-02-03 09:52:42 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-02-03 09:52:43 +0000
commit035b21977ce1791a630c5cbf46e482e54552e05b (patch)
treec7ad2235c6fc70fa1fb88e613f4134f0fec8f5cb /hw
parent28db64fce555a03b4ca256d5b6f4290abdfbd9e8 (diff)
parent63d57c8f91d0d0e62fc4d91db6340a662b36a3c0 (diff)
downloadqemu-035b21977ce1791a630c5cbf46e482e54552e05b.zip
qemu-035b21977ce1791a630c5cbf46e482e54552e05b.tar.gz
qemu-035b21977ce1791a630c5cbf46e482e54552e05b.tar.bz2
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.0-20200203' into staging
ppc patch queue 2020-02093 This pull request supersedes ppc-for-5.0-20200131. The only changes are one extra patch to suppress some irritating warnings during tests under TCG, and an extra Tested-by in one of the other patches. Here's the next batch of patches for ppc and associated machine types. Highlights includes: * Remove the deprecated "prep" machine type and its OpenHackware firmware * Add TCG emulation of the msgsndp etc. supervisor privileged doorbell instructions * Allow "pnv" machine type to run Hostboot style firmwares * Add a virtual TPM device for spapr machines * Implement devices for POWER8 PHB3 and POWER9 PHB4 host bridges for the pnv machine type * Use faster Spectre mitigation by default for POWER9 DD2.3 machines * Introduce Firmware Assisted NMI dump facility for spapr machines * Fix a performance regression with load/store multiple instructions in TCG as well as some other assorted cleanups and fixes. # gpg: Signature made Mon 03 Feb 2020 03:30:24 GMT # gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full] # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full] # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full] # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown] # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-5.0-20200203: (35 commits) tests: Silence various warnings with pseries target/ppc: Use probe_write for DCBZ target/ppc: Remove redundant mask in DCBZ target/ppc: Use probe_access for LMW, STMW target/ppc: Use probe_access for LSW, STSW ppc: spapr: Activate the FWNMI functionality migration: Include migration support for machine check handling ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS calls target/ppc: Build rtas error log upon an MCE target/ppc: Handle NMI guest exit ppc: spapr: Introduce FWNMI capability Wrapper function to wait on condition for the main loop mutex target/ppc/cpu.h: Put macro parameter in parentheses spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine ppc/pnv: change the PowerNV machine devices to be non user creatable ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge docs/specs/tpm: reST-ify TPM documentation hw/ppc/Kconfig: Enable TPM_SPAPR as part of PSERIES config tpm_spapr: Support suspend and resume ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/intc/xics.c14
-rw-r--r--hw/pci-host/Makefile.objs2
-rw-r--r--hw/pci-host/pnv_phb3.c1197
-rw-r--r--hw/pci-host/pnv_phb3_msi.c349
-rw-r--r--hw/pci-host/pnv_phb3_pbcq.c358
-rw-r--r--hw/pci-host/pnv_phb4.c1439
-rw-r--r--hw/pci-host/pnv_phb4_pec.c595
-rw-r--r--hw/ppc/Kconfig2
-rw-r--r--hw/ppc/pnv.c204
-rw-r--r--hw/ppc/pnv_core.c33
-rw-r--r--hw/ppc/pnv_homer.c1
-rw-r--r--hw/ppc/pnv_lpc.c6
-rw-r--r--hw/ppc/pnv_occ.c1
-rw-r--r--hw/ppc/pnv_pnor.c6
-rw-r--r--hw/ppc/ppc.c18
-rw-r--r--hw/ppc/prep.c384
-rw-r--r--hw/ppc/spapr.c63
-rw-r--r--hw/ppc/spapr_caps.c49
-rw-r--r--hw/ppc/spapr_events.c269
-rw-r--r--hw/ppc/spapr_hcall.c20
-rw-r--r--hw/ppc/spapr_rtas.c87
-rw-r--r--hw/ppc/spapr_vio.c11
-rw-r--r--hw/ppc/virtex_ml507.c7
-rw-r--r--hw/tpm/Kconfig6
-rw-r--r--hw/tpm/Makefile.objs1
-rw-r--r--hw/tpm/tpm_spapr.c429
-rw-r--r--hw/tpm/tpm_tis.c32
-rw-r--r--hw/tpm/tpm_util.c25
-rw-r--r--hw/tpm/tpm_util.h3
-rw-r--r--hw/tpm/trace-events16
30 files changed, 5163 insertions, 464 deletions
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 785b607..c5d507e 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -217,7 +217,7 @@ void icp_eoi(ICPState *icp, uint32_t xirr)
}
}
-static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
{
ICPState *icp = xics_icp_get(ics->xics, server);
@@ -512,8 +512,14 @@ void ics_write_xive(ICSState *ics, int srcno, int server,
static void ics_reject(ICSState *ics, uint32_t nr)
{
+ ICSStateClass *isc = ICS_GET_CLASS(ics);
ICSIRQState *irq = ics->irqs + nr - ics->offset;
+ if (isc->reject) {
+ isc->reject(ics, nr);
+ return;
+ }
+
trace_xics_ics_reject(nr, nr - ics->offset);
if (irq->flags & XICS_FLAGS_IRQ_MSI) {
irq->status |= XICS_STATUS_REJECTED;
@@ -524,8 +530,14 @@ static void ics_reject(ICSState *ics, uint32_t nr)
void ics_resend(ICSState *ics)
{
+ ICSStateClass *isc = ICS_GET_CLASS(ics);
int i;
+ if (isc->resend) {
+ isc->resend(ics);
+ return;
+ }
+
for (i = 0; i < ics->nr_irqs; i++) {
/* FIXME: filter by server#? */
if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
index 9c466fa..8c87e84 100644
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -20,3 +20,5 @@ common-obj-$(CONFIG_PCI_EXPRESS_GENERIC_BRIDGE) += gpex.o
common-obj-$(CONFIG_PCI_EXPRESS_XILINX) += xilinx-pcie.o
common-obj-$(CONFIG_PCI_EXPRESS_DESIGNWARE) += designware.o
+obj-$(CONFIG_POWERNV) += pnv_phb4.o pnv_phb4_pec.o
+obj-$(CONFIG_POWERNV) += pnv_phb3.o pnv_phb3_msi.o pnv_phb3_pbcq.o
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
new file mode 100644
index 0000000..74618fa
--- /dev/null
+++ b/hw/pci-host/pnv_phb3.c
@@ -0,0 +1,1197 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/pnv.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+
+#define phb3_error(phb, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n", \
+ (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
+
+static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+ uint8_t bus, devfn;
+
+ if (!(addr >> 63)) {
+ return NULL;
+ }
+ bus = (addr >> 52) & 0xff;
+ devfn = (addr >> 44) & 0xff;
+
+ return pci_find_device(pci->bus, bus, devfn);
+}
+
+/*
+ * The CONFIG_DATA register expects little endian accesses, but as the
+ * region is big endian, we have to swap the value.
+ */
+static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off,
+ unsigned size, uint64_t val)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+
+ pdev = pnv_phb3_find_cfg_dev(phb);
+ if (!pdev) {
+ return;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return;
+ }
+ switch (size) {
+ case 1:
+ break;
+ case 2:
+ val = bswap16(val);
+ break;
+ case 4:
+ val = bswap32(val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off,
+ unsigned size)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+ uint64_t val;
+
+ pdev = pnv_phb3_find_cfg_dev(phb);
+ if (!pdev) {
+ return ~0ull;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return ~0ull;
+ }
+ val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+ switch (size) {
+ case 1:
+ return val;
+ case 2:
+ return bswap16(val);
+ case 4:
+ return bswap32(val);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void pnv_phb3_check_m32(PnvPHB3 *phb)
+{
+ uint64_t base, start, size;
+ MemoryRegion *parent;
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ if (memory_region_is_mapped(&phb->mr_m32)) {
+ memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32);
+ }
+
+ if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) {
+ return;
+ }
+
+ /* Grab geometry from registers */
+ base = phb->regs[PHB_M32_BASE_ADDR >> 3];
+ start = phb->regs[PHB_M32_START_ADDR >> 3];
+ size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1;
+
+ /* Check if it matches an enabled MMIO region in the PBCQ */
+ if (memory_region_is_mapped(&pbcq->mmbar0) &&
+ base >= pbcq->mmio0_base &&
+ (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
+ parent = &pbcq->mmbar0;
+ base -= pbcq->mmio0_base;
+ } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
+ base >= pbcq->mmio1_base &&
+ (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
+ parent = &pbcq->mmbar1;
+ base -= pbcq->mmio1_base;
+ } else {
+ return;
+ }
+
+ /* Create alias */
+ memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32",
+ &phb->pci_mmio, start, size);
+ memory_region_add_subregion(parent, base, &phb->mr_m32);
+}
+
+static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index)
+{
+ uint64_t base, start, size, m64;
+ MemoryRegion *parent;
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ if (memory_region_is_mapped(&phb->mr_m64[index])) {
+ /* Should we destroy it in RCU friendly way... ? */
+ memory_region_del_subregion(phb->mr_m64[index].container,
+ &phb->mr_m64[index]);
+ }
+
+ /* Get table entry */
+ m64 = phb->ioda_M64BT[index];
+
+ if (!(m64 & IODA2_M64BT_ENABLE)) {
+ return;
+ }
+
+ /* Grab geometry from registers */
+ base = GETFIELD(IODA2_M64BT_BASE, m64) << 20;
+ if (m64 & IODA2_M64BT_SINGLE_PE) {
+ base &= ~0x1ffffffull;
+ }
+ size = GETFIELD(IODA2_M64BT_MASK, m64) << 20;
+ size |= 0xfffc000000000000ull;
+ size = ~size + 1;
+ start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+
+ /* Check if it matches an enabled MMIO region in the PBCQ */
+ if (memory_region_is_mapped(&pbcq->mmbar0) &&
+ base >= pbcq->mmio0_base &&
+ (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
+ parent = &pbcq->mmbar0;
+ base -= pbcq->mmio0_base;
+ } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
+ base >= pbcq->mmio1_base &&
+ (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
+ parent = &pbcq->mmbar1;
+ base -= pbcq->mmio1_base;
+ } else {
+ return;
+ }
+
+ /* Create alias */
+ memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64",
+ &phb->pci_mmio, start, size);
+ memory_region_add_subregion(parent, base, &phb->mr_m64[index]);
+}
+
+static void pnv_phb3_check_all_m64s(PnvPHB3 *phb)
+{
+ uint64_t i;
+
+ for (i = 0; i < PNV_PHB3_NUM_M64; i++) {
+ pnv_phb3_check_m64(phb, i);
+ }
+}
+
+static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val)
+{
+ uint8_t server, prio;
+
+ phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER |
+ IODA2_LXIVT_PRIORITY |
+ IODA2_LXIVT_NODE_ID);
+ server = GETFIELD(IODA2_LXIVT_SERVER, val);
+ prio = GETFIELD(IODA2_LXIVT_PRIORITY, val);
+
+ /*
+ * The low order 2 bits are the link pointer (Type II interrupts).
+ * Shift back to get a valid IRQ server.
+ */
+ server >>= 2;
+
+ ics_write_xive(&phb->lsis, idx, server, prio, prio);
+}
+
+static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb,
+ unsigned *out_table, unsigned *out_idx)
+{
+ uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+ unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+ unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+ unsigned int mask;
+ uint64_t *tptr = NULL;
+
+ switch (table) {
+ case IODA2_TBL_LIST:
+ tptr = phb->ioda_LIST;
+ mask = 7;
+ break;
+ case IODA2_TBL_LXIVT:
+ tptr = phb->ioda_LXIVT;
+ mask = 7;
+ break;
+ case IODA2_TBL_IVC_CAM:
+ case IODA2_TBL_RBA:
+ mask = 31;
+ break;
+ case IODA2_TBL_RCAM:
+ mask = 63;
+ break;
+ case IODA2_TBL_MRT:
+ mask = 7;
+ break;
+ case IODA2_TBL_PESTA:
+ case IODA2_TBL_PESTB:
+ mask = 255;
+ break;
+ case IODA2_TBL_TVT:
+ tptr = phb->ioda_TVT;
+ mask = 511;
+ break;
+ case IODA2_TBL_TCAM:
+ case IODA2_TBL_TDR:
+ mask = 63;
+ break;
+ case IODA2_TBL_M64BT:
+ tptr = phb->ioda_M64BT;
+ mask = 15;
+ break;
+ case IODA2_TBL_M32DT:
+ tptr = phb->ioda_MDT;
+ mask = 255;
+ break;
+ case IODA2_TBL_PEEV:
+ tptr = phb->ioda_PEEV;
+ mask = 3;
+ break;
+ default:
+ phb3_error(phb, "invalid IODA table %d", table);
+ return NULL;
+ }
+ index &= mask;
+ if (out_idx) {
+ *out_idx = index;
+ }
+ if (out_table) {
+ *out_table = table;
+ }
+ if (tptr) {
+ tptr += index;
+ }
+ if (adreg & PHB_IODA_AD_AUTOINC) {
+ index = (index + 1) & mask;
+ adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+ }
+ phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+ return tptr;
+}
+
+static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb)
+{
+ unsigned table;
+ uint64_t *tptr;
+
+ tptr = pnv_phb3_ioda_access(phb, &table, NULL);
+ if (!tptr) {
+ /* Return 0 on unsupported tables, not ff's */
+ return 0;
+ }
+ return *tptr;
+}
+
+static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val)
+{
+ unsigned table, idx;
+ uint64_t *tptr;
+
+ tptr = pnv_phb3_ioda_access(phb, &table, &idx);
+ if (!tptr) {
+ return;
+ }
+
+ /* Handle side effects */
+ switch (table) {
+ case IODA2_TBL_LXIVT:
+ pnv_phb3_lxivt_write(phb, idx, val);
+ break;
+ case IODA2_TBL_M64BT:
+ *tptr = val;
+ pnv_phb3_check_m64(phb, idx);
+ break;
+ default:
+ *tptr = val;
+ }
+}
+
+/*
+ * This is called whenever the PHB LSI, MSI source ID register or
+ * the PBCQ irq filters are written.
+ */
+void pnv_phb3_remap_irqs(PnvPHB3 *phb)
+{
+ ICSState *ics = &phb->lsis;
+ uint32_t local, global, count, mask, comp;
+ uint64_t baren;
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ /*
+ * First check if we are enabled. Unlike real HW we don't separate
+ * TX and RX so we enable if both are set
+ */
+ baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+ if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
+ !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
+ ics->offset = 0;
+ return;
+ }
+
+ /* Grab local LSI source ID */
+ local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
+
+ /* Grab global one and compare */
+ global = GETFIELD(PBCQ_NEST_LSI_SRC,
+ pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
+ if (global != local) {
+ /*
+ * This happens during initialization, let's come back when we
+ * are properly configured
+ */
+ ics->offset = 0;
+ return;
+ }
+
+ /* Get the base on the powerbus */
+ comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
+ pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
+ mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
+ pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
+ count = ((~mask) + 1) & 0x7ffff;
+ phb->total_irq = count;
+
+ /* Sanity checks */
+ if ((global + PNV_PHB3_NUM_LSI) > count) {
+ phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global,
+ count);
+ }
+
+ if (count > 2048) {
+ phb3_error(phb, "More interrupts than supported: %d", count);
+ }
+
+ if ((comp & mask) != comp) {
+ phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
+ comp, mask);
+ comp &= mask;
+ }
+ /* Setup LSI offset */
+ ics->offset = comp + global;
+
+ /* Setup MSI offset */
+ pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI);
+}
+
+static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val)
+{
+ /* Sanitize content */
+ val &= PHB_LSI_SRC_ID;
+ phb->regs[PHB_LSI_SOURCE_ID >> 3] = val;
+ pnv_phb3_remap_irqs(phb);
+}
+
+static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val)
+{
+ PnvPhb3DMASpace *ds;
+
+ /* Always invalidate all for now ... */
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ ds->pe_num = PHB_INVALID_PE;
+ }
+}
+
+
+static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds)
+{
+ uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+
+ if (cfg & PHB_PHB3C_32BIT_MSI_EN) {
+ if (!memory_region_is_mapped(&ds->msi32_mr)) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ 0xffff0000, &ds->msi32_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(&ds->msi32_mr)) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi32_mr);
+ }
+ }
+
+ if (cfg & PHB_PHB3C_64BIT_MSI_EN) {
+ if (!memory_region_is_mapped(&ds->msi64_mr)) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ (1ull << 60), &ds->msi64_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(&ds->msi64_mr)) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi64_mr);
+ }
+ }
+}
+
+static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb)
+{
+ PnvPhb3DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ pnv_phb3_update_msi_regions(ds);
+ }
+}
+
+void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)
+{
+ PnvPHB3 *phb = opaque;
+ bool changed;
+
+ /* Special case configuration data */
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ pnv_phb3_config_write(phb, off & 0x3, size, val);
+ return;
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return;
+ }
+
+ /* Handle masking & filtering */
+ switch (off) {
+ case PHB_M64_UPPER_BITS:
+ val &= 0xfffc000000000000ull;
+ break;
+ case PHB_Q_DMA_R:
+ /*
+ * This is enough logic to make SW happy but we aren't actually
+ * quiescing the DMAs
+ */
+ if (val & PHB_Q_DMA_R_AUTORESET) {
+ val = 0;
+ } else {
+ val &= PHB_Q_DMA_R_QUIESCE_DMA;
+ }
+ break;
+ /* LEM stuff */
+ case PHB_LEM_FIR_AND_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
+ return;
+ case PHB_LEM_FIR_OR_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
+ return;
+ case PHB_LEM_ERROR_AND_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
+ return;
+ case PHB_LEM_ERROR_OR_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
+ return;
+ case PHB_LEM_WOF:
+ val = 0;
+ break;
+ }
+
+ /* Record whether it changed */
+ changed = phb->regs[off >> 3] != val;
+
+ /* Store in register cache first */
+ phb->regs[off >> 3] = val;
+
+ /* Handle side effects */
+ switch (off) {
+ case PHB_PHB3_CONFIG:
+ if (changed) {
+ pnv_phb3_update_all_msi_regions(phb);
+ }
+ /* fall through */
+ case PHB_M32_BASE_ADDR:
+ case PHB_M32_BASE_MASK:
+ case PHB_M32_START_ADDR:
+ if (changed) {
+ pnv_phb3_check_m32(phb);
+ }
+ break;
+ case PHB_M64_UPPER_BITS:
+ if (changed) {
+ pnv_phb3_check_all_m64s(phb);
+ }
+ break;
+ case PHB_LSI_SOURCE_ID:
+ if (changed) {
+ pnv_phb3_lsi_src_id_write(phb, val);
+ }
+ break;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ pnv_phb3_ioda_write(phb, val);
+ break;
+
+ /* RTC invalidation */
+ case PHB_RTC_INVALIDATE:
+ pnv_phb3_rtc_invalidate(phb, val);
+ break;
+
+ /* FFI request */
+ case PHB_FFI_REQUEST:
+ pnv_phb3_msi_ffi(&phb->msis, val);
+ break;
+
+ /* Silent simple writes */
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_RBA_BAR:
+ case PHB_IVT_BAR:
+ case PHB_FFI_LOCK:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+}
+
+uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+ PnvPHB3 *phb = opaque;
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ uint64_t val;
+
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ return pnv_phb3_config_read(phb, off & 0x3, size);
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return ~0ull;
+ }
+
+ /* Default read from cache */
+ val = phb->regs[off >> 3];
+
+ switch (off) {
+ /* Simulate venice DD2.0 */
+ case PHB_VERSION:
+ return 0x000000a300000005ull;
+ case PHB_PCIE_SYSTEM_CONFIG:
+ return 0x441100fc30000000;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ return pnv_phb3_ioda_read(phb);
+
+ /* Link training always appears trained */
+ case PHB_PCIE_DLP_TRAIN_CTL:
+ if (!pci_find_device(pci->bus, 1, 0)) {
+ return 0;
+ }
+ return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT;
+
+ /* FFI Lock */
+ case PHB_FFI_LOCK:
+ /* Set lock and return previous value */
+ phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE;
+ return val;
+
+ /* DMA read sync: make it look like it's complete */
+ case PHB_DMARD_SYNC:
+ return PHB_DMARD_SYNC_COMPLETE;
+
+ /* Silent simple reads */
+ case PHB_PHB3_CONFIG:
+ case PHB_M32_BASE_ADDR:
+ case PHB_M32_BASE_MASK:
+ case PHB_M32_START_ADDR:
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_RTC_INVALIDATE:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_RBA_BAR:
+ case PHB_IVT_BAR:
+ case PHB_M64_UPPER_BITS:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+ return val;
+}
+
+static const MemoryRegionOps pnv_phb3_reg_ops = {
+ .read = pnv_phb3_reg_read,
+ .write = pnv_phb3_reg_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ /* Check that out properly ... */
+ return irq_num & 3;
+}
+
+static void pnv_phb3_set_irq(void *opaque, int irq_num, int level)
+{
+ PnvPHB3 *phb = opaque;
+
+ /* LSI only ... */
+ if (irq_num > 3) {
+ phb3_error(phb, "Unknown IRQ to set %d", irq_num);
+ }
+ qemu_set_irq(phb->qirqs[irq_num], level);
+}
+
+static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds)
+{
+ uint64_t rtt, addr;
+ uint16_t rte;
+ int bus_num;
+
+ /* Already resolved ? */
+ if (ds->pe_num != PHB_INVALID_PE) {
+ return true;
+ }
+
+ /* We need to lookup the RTT */
+ rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+ if (!(rtt & PHB_RTT_BAR_ENABLE)) {
+ phb3_error(ds->phb, "DMA with RTT BAR disabled !");
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+
+ /* Read RTE */
+ bus_num = pci_bus_num(ds->bus);
+ addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+ addr += 2 * ((bus_num << 8) | ds->devfn);
+ if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+ phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+ rte = be16_to_cpu(rte);
+
+ /* Fail upon reading of invalid PE# */
+ if (rte >= PNV_PHB3_NUM_PE) {
+ phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+ ds->pe_num = rte;
+ return true;
+}
+
+static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
+ bool is_write, uint64_t tve,
+ IOMMUTLBEntry *tlb)
+{
+ uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve);
+ int32_t lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve);
+ uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve);
+ uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve);
+ PnvPHB3 *phb = ds->phb;
+
+ /* Invalid levels */
+ if (lev > 4) {
+ phb3_error(phb, "Invalid #levels in TVE %d", lev);
+ return;
+ }
+
+ /* IO Page Size of 0 means untranslated, else use TCEs */
+ if (tps == 0) {
+ /*
+ * We only support non-translate in top window.
+ *
+ * TODO: Venice/Murano support it on bottom window above 4G and
+ * Naples suports it on everything
+ */
+ if (!(tve & PPC_BIT(51))) {
+ phb3_error(phb, "xlate for invalid non-translate TVE");
+ return;
+ }
+ /* TODO: Handle boundaries */
+
+ /* Use 4k pages like q35 ... for now */
+ tlb->iova = addr & 0xfffffffffffff000ull;
+ tlb->translated_addr = addr & 0x0003fffffffff000ull;
+ tlb->addr_mask = 0xfffull;
+ tlb->perm = IOMMU_RW;
+ } else {
+ uint32_t tce_shift, tbl_shift, sh;
+ uint64_t base, taddr, tce, tce_mask;
+
+ /* TVE disabled ? */
+ if (tts == 0) {
+ phb3_error(phb, "xlate for invalid translated TVE");
+ return;
+ }
+
+ /* Address bits per bottom level TCE entry */
+ tce_shift = tps + 11;
+
+ /* Address bits per table level */
+ tbl_shift = tts + 8;
+
+ /* Top level table base address */
+ base = tta << 12;
+
+ /* Total shift to first level */
+ sh = tbl_shift * lev + tce_shift;
+
+ /* TODO: Multi-level untested */
+ while ((lev--) >= 0) {
+ /* Grab the TCE address */
+ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+ if (dma_memory_read(&address_space_memory, taddr, &tce,
+ sizeof(tce))) {
+ phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr);
+ return;
+ }
+ tce = be64_to_cpu(tce);
+
+ /* Check permission for indirect TCE */
+ if ((lev >= 0) && !(tce & 3)) {
+ phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
+ phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ return;
+ }
+ sh -= tbl_shift;
+ base = tce & ~0xfffull;
+ }
+
+ /* We exit the loop with TCE being the final TCE */
+ tce_mask = ~((1ull << tce_shift) - 1);
+ tlb->iova = addr & tce_mask;
+ tlb->translated_addr = tce & tce_mask;
+ tlb->addr_mask = ~tce_mask;
+ tlb->perm = tce & 3;
+ if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+ phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr);
+ phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ }
+ }
+}
+
+static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flag,
+ int iommu_idx)
+{
+ PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr);
+ int tve_sel;
+ uint64_t tve, cfg;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE,
+ };
+ PnvPHB3 *phb = ds->phb;
+
+ /* Resolve PE# */
+ if (!pnv_phb3_resolve_pe(ds)) {
+ phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return ret;
+ }
+
+ /* Check top bits */
+ switch (addr >> 60) {
+ case 00:
+ /* DMA or 32-bit MSI ? */
+ cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+ if ((cfg & PHB_PHB3C_32BIT_MSI_EN) &&
+ ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+ phb3_error(phb, "xlate on 32-bit MSI region");
+ return ret;
+ }
+ /* Choose TVE XXX Use PHB3 Control Register */
+ tve_sel = (addr >> 59) & 1;
+ tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+ pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
+ break;
+ case 01:
+ phb3_error(phb, "xlate on 64-bit MSI region");
+ break;
+ default:
+ phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr);
+ }
+ return ret;
+}
+
+#define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region"
+#define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \
+ OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION)
+
+static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass,
+ void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = pnv_phb3_translate_iommu;
+}
+
+static const TypeInfo pnv_phb3_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
+ .class_init = pnv_phb3_iommu_memory_region_class_init,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb3_msi_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ PnvPhb3DMASpace *ds = opaque;
+
+ /* Resolve PE# */
+ if (!pnv_phb3_resolve_pe(ds)) {
+ phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return;
+ }
+
+ pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num);
+}
+
+/* There is no .read as the read result is undefined by PCI spec */
+static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPhb3DMASpace *ds = opaque;
+
+ phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr);
+ return -1;
+}
+
+static const MemoryRegionOps pnv_phb3_msi_ops = {
+ .read = pnv_phb3_msi_read,
+ .write = pnv_phb3_msi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ PnvPHB3 *phb = opaque;
+ PnvPhb3DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ if (ds->bus == bus && ds->devfn == devfn) {
+ break;
+ }
+ }
+
+ if (ds == NULL) {
+ ds = g_malloc0(sizeof(PnvPhb3DMASpace));
+ ds->bus = bus;
+ ds->devfn = devfn;
+ ds->pe_num = PHB_INVALID_PE;
+ ds->phb = phb;
+ memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
+ TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
+ OBJECT(phb), "phb3_iommu", UINT64_MAX);
+ address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
+ "phb3_iommu");
+ memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+ ds, "msi32", 0x10000);
+ memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+ ds, "msi64", 0x100000);
+ pnv_phb3_update_msi_regions(ds);
+
+ QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+ }
+ return &ds->dma_as;
+}
+
+static void pnv_phb3_instance_init(Object *obj)
+{
+ PnvPHB3 *phb = PNV_PHB3(obj);
+
+ QLIST_INIT(&phb->dma_spaces);
+
+ /* LSI sources */
+ object_initialize_child(obj, "lsi", &phb->lsis, sizeof(phb->lsis),
+ TYPE_ICS, &error_abort, NULL);
+
+ /* Default init ... will be fixed by HW inits */
+ phb->lsis.offset = 0;
+
+ /* MSI sources */
+ object_initialize_child(obj, "msi", &phb->msis, sizeof(phb->msis),
+ TYPE_PHB3_MSI, &error_abort, NULL);
+
+ /* Power Bus Common Queue */
+ object_initialize_child(obj, "pbcq", &phb->pbcq, sizeof(phb->pbcq),
+ TYPE_PNV_PBCQ, &error_abort, NULL);
+
+ /* Root Port */
+ object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
+ TYPE_PNV_PHB3_ROOT_PORT, &error_abort, NULL);
+ qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
+ qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
+}
+
+static void pnv_phb3_realize(DeviceState *dev, Error **errp)
+{
+ PnvPHB3 *phb = PNV_PHB3(dev);
+ PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+ Error *local_err = NULL;
+ int i;
+
+ if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) {
+ error_setg(errp, "invalid PHB index: %d", phb->phb_id);
+ return;
+ }
+
+ /* LSI sources */
+ object_property_set_link(OBJECT(&phb->lsis), OBJECT(pnv), "xics",
+ &error_abort);
+ object_property_set_int(OBJECT(&phb->lsis), PNV_PHB3_NUM_LSI, "nr-irqs",
+ &error_abort);
+ object_property_set_bool(OBJECT(&phb->lsis), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ for (i = 0; i < phb->lsis.nr_irqs; i++) {
+ ics_set_irq_type(&phb->lsis, i, true);
+ }
+
+ phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs);
+
+ /* MSI sources */
+ object_property_set_link(OBJECT(&phb->msis), OBJECT(phb), "phb",
+ &error_abort);
+ object_property_set_link(OBJECT(&phb->msis), OBJECT(pnv), "xics",
+ &error_abort);
+ object_property_set_int(OBJECT(&phb->msis), PHB3_MAX_MSI, "nr-irqs",
+ &error_abort);
+ object_property_set_bool(OBJECT(&phb->msis), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Power Bus Common Queue */
+ object_property_set_link(OBJECT(&phb->pbcq), OBJECT(phb), "phb",
+ &error_abort);
+ object_property_set_bool(OBJECT(&phb->pbcq), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Controller Registers */
+ memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb,
+ "phb3-regs", 0x1000);
+
+ /*
+ * PHB3 doesn't support IO space. However, qemu gets very upset if
+ * we don't have an IO region to anchor IO BARs onto so we just
+ * initialize one which we never hook up to anything
+ */
+ memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000);
+ memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio",
+ PCI_MMIO_TOTAL_SIZE);
+
+ pci->bus = pci_register_root_bus(dev, "root-bus",
+ pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
+ &phb->pci_mmio, &phb->pci_io,
+ 0, 4, TYPE_PNV_PHB3_ROOT_BUS);
+
+ pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+
+ /* Add a single Root port */
+ qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
+ qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
+ qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
+ qdev_init_nofail(DEVICE(&phb->root));
+}
+
+void pnv_phb3_update_regions(PnvPHB3 *phb)
+{
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ /* Unmap first always */
+ if (memory_region_is_mapped(&phb->mr_regs)) {
+ memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs);
+ }
+
+ /* Map registers if enabled */
+ if (memory_region_is_mapped(&pbcq->phbbar)) {
+ /* TODO: We should use the PHB BAR 2 register but we don't ... */
+ memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs);
+ }
+
+ /* Check/update m32 */
+ if (memory_region_is_mapped(&phb->mr_m32)) {
+ pnv_phb3_check_m32(phb);
+ }
+ pnv_phb3_check_all_m64s(phb);
+}
+
+static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+ PnvPHB3 *phb = PNV_PHB3(host_bridge);
+
+ snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+ phb->chip_id, phb->phb_id);
+ return phb->bus_path;
+}
+
+static Property pnv_phb3_properties[] = {
+ DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
+ DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb3_class_init(ObjectClass *klass, void *data)
+{
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ hc->root_bus_path = pnv_phb3_root_bus_path;
+ dc->realize = pnv_phb3_realize;
+ device_class_set_props(dc, pnv_phb3_properties);
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_phb3_type_info = {
+ .name = TYPE_PNV_PHB3,
+ .parent = TYPE_PCIE_HOST_BRIDGE,
+ .instance_size = sizeof(PnvPHB3),
+ .class_init = pnv_phb3_class_init,
+ .instance_init = pnv_phb3_instance_init,
+};
+
+static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+
+ /*
+ * PHB3 has only a single root complex. Enforce the limit on the
+ * parent bus
+ */
+ k->max_dev = 1;
+}
+
+static const TypeInfo pnv_phb3_root_bus_info = {
+ .name = TYPE_PNV_PHB3_ROOT_BUS,
+ .parent = TYPE_PCIE_BUS,
+ .class_init = pnv_phb3_root_bus_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { }
+ },
+};
+
+static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ Error *local_err = NULL;
+
+ rpc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+ dc->desc = "IBM PHB3 PCIE Root Port";
+
+ device_class_set_parent_realize(dc, pnv_phb3_root_port_realize,
+ &rpc->parent_realize);
+ dc->user_creatable = false;
+
+ k->vendor_id = PCI_VENDOR_ID_IBM;
+ k->device_id = 0x03dc;
+ k->revision = 0;
+
+ rpc->exp_offset = 0x48;
+ rpc->aer_offset = 0x100;
+}
+
+static const TypeInfo pnv_phb3_root_port_info = {
+ .name = TYPE_PNV_PHB3_ROOT_PORT,
+ .parent = TYPE_PCIE_ROOT_PORT,
+ .instance_size = sizeof(PnvPHB3RootPort),
+ .class_init = pnv_phb3_root_port_class_init,
+};
+
+static void pnv_phb3_register_types(void)
+{
+ type_register_static(&pnv_phb3_root_bus_info);
+ type_register_static(&pnv_phb3_root_port_info);
+ type_register_static(&pnv_phb3_type_info);
+ type_register_static(&pnv_phb3_iommu_memory_region_info);
+}
+
+type_init(pnv_phb3_register_types)
diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c
new file mode 100644
index 0000000..ecfc1b2
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_msi.c
@@ -0,0 +1,349 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/ppc/pnv.h"
+#include "hw/pci/msi.h"
+#include "monitor/monitor.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/reset.h"
+
+static uint64_t phb3_msi_ive_addr(PnvPHB3 *phb, int srcno)
+{
+ uint64_t ivtbar = phb->regs[PHB_IVT_BAR >> 3];
+ uint64_t phbctl = phb->regs[PHB_CONTROL >> 3];
+
+ if (!(ivtbar & PHB_IVT_BAR_ENABLE)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Failed access to disable IVT BAR !");
+ return 0;
+ }
+
+ if (srcno >= (ivtbar & PHB_IVT_LENGTH_MASK)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "MSI out of bounds (%d vs 0x%"PRIx64")",
+ srcno, (uint64_t) (ivtbar & PHB_IVT_LENGTH_MASK));
+ return 0;
+ }
+
+ ivtbar &= PHB_IVT_BASE_ADDRESS_MASK;
+
+ if (phbctl & PHB_CTRL_IVE_128_BYTES) {
+ return ivtbar + 128 * srcno;
+ } else {
+ return ivtbar + 16 * srcno;
+ }
+}
+
+static bool phb3_msi_read_ive(PnvPHB3 *phb, int srcno, uint64_t *out_ive)
+{
+ uint64_t ive_addr, ive;
+
+ ive_addr = phb3_msi_ive_addr(phb, srcno);
+ if (!ive_addr) {
+ return false;
+ }
+
+ if (dma_memory_read(&address_space_memory, ive_addr, &ive, sizeof(ive))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Failed to read IVE at 0x%" PRIx64,
+ ive_addr);
+ return false;
+ }
+ *out_ive = be64_to_cpu(ive);
+
+ return true;
+}
+
+static void phb3_msi_set_p(Phb3MsiState *msi, int srcno, uint8_t gen)
+{
+ uint64_t ive_addr;
+ uint8_t p = 0x01 | (gen << 1);
+
+ ive_addr = phb3_msi_ive_addr(msi->phb, srcno);
+ if (!ive_addr) {
+ return;
+ }
+
+ if (dma_memory_write(&address_space_memory, ive_addr + 4, &p, 1)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to write IVE (set P) at 0x%" PRIx64, ive_addr);
+ }
+}
+
+static void phb3_msi_set_q(Phb3MsiState *msi, int srcno)
+{
+ uint64_t ive_addr;
+ uint8_t q = 0x01;
+
+ ive_addr = phb3_msi_ive_addr(msi->phb, srcno);
+ if (!ive_addr) {
+ return;
+ }
+
+ if (dma_memory_write(&address_space_memory, ive_addr + 5, &q, 1)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to write IVE (set Q) at 0x%" PRIx64, ive_addr);
+ }
+}
+
+static void phb3_msi_try_send(Phb3MsiState *msi, int srcno, bool force)
+{
+ ICSState *ics = ICS(msi);
+ uint64_t ive;
+ uint64_t server, prio, pq, gen;
+
+ if (!phb3_msi_read_ive(msi->phb, srcno, &ive)) {
+ return;
+ }
+
+ server = GETFIELD(IODA2_IVT_SERVER, ive);
+ prio = GETFIELD(IODA2_IVT_PRIORITY, ive);
+ if (!force) {
+ pq = GETFIELD(IODA2_IVT_Q, ive) | (GETFIELD(IODA2_IVT_P, ive) << 1);
+ } else {
+ pq = 0;
+ }
+ gen = GETFIELD(IODA2_IVT_GEN, ive);
+
+ /*
+ * The low order 2 bits are the link pointer (Type II interrupts).
+ * Shift back to get a valid IRQ server.
+ */
+ server >>= 2;
+
+ switch (pq) {
+ case 0: /* 00 */
+ if (prio == 0xff) {
+ /* Masked, set Q */
+ phb3_msi_set_q(msi, srcno);
+ } else {
+ /* Enabled, set P and send */
+ phb3_msi_set_p(msi, srcno, gen);
+ icp_irq(ics, server, srcno + ics->offset, prio);
+ }
+ break;
+ case 2: /* 10 */
+ /* Already pending, set Q */
+ phb3_msi_set_q(msi, srcno);
+ break;
+ case 1: /* 01 */
+ case 3: /* 11 */
+ default:
+ /* Just drop stuff if Q already set */
+ break;
+ }
+}
+
+static void phb3_msi_set_irq(void *opaque, int srcno, int val)
+{
+ Phb3MsiState *msi = PHB3_MSI(opaque);
+
+ if (val) {
+ phb3_msi_try_send(msi, srcno, false);
+ }
+}
+
+
+void pnv_phb3_msi_send(Phb3MsiState *msi, uint64_t addr, uint16_t data,
+ int32_t dev_pe)
+{
+ ICSState *ics = ICS(msi);
+ uint64_t ive;
+ uint16_t pe;
+ uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+ if (src >= ics->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "MSI %d out of bounds", src);
+ return;
+ }
+ if (dev_pe >= 0) {
+ if (!phb3_msi_read_ive(msi->phb, src, &ive)) {
+ return;
+ }
+ pe = GETFIELD(IODA2_IVT_PE, ive);
+ if (pe != dev_pe) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "MSI %d send by PE#%d but assigned to PE#%d",
+ src, dev_pe, pe);
+ return;
+ }
+ }
+ qemu_irq_pulse(msi->qirqs[src]);
+}
+
+void pnv_phb3_msi_ffi(Phb3MsiState *msi, uint64_t val)
+{
+ /* Emit interrupt */
+ pnv_phb3_msi_send(msi, val, 0, -1);
+
+ /* Clear FFI lock */
+ msi->phb->regs[PHB_FFI_LOCK >> 3] = 0;
+}
+
+static void phb3_msi_reject(ICSState *ics, uint32_t nr)
+{
+ Phb3MsiState *msi = PHB3_MSI(ics);
+ unsigned int srcno = nr - ics->offset;
+ unsigned int idx = srcno >> 6;
+ unsigned int bit = 1ull << (srcno & 0x3f);
+
+ assert(srcno < PHB3_MAX_MSI);
+
+ msi->rba[idx] |= bit;
+ msi->rba_sum |= (1u << idx);
+}
+
+static void phb3_msi_resend(ICSState *ics)
+{
+ Phb3MsiState *msi = PHB3_MSI(ics);
+ unsigned int i, j;
+
+ if (msi->rba_sum == 0) {
+ return;
+ }
+
+ for (i = 0; i < 32; i++) {
+ if ((msi->rba_sum & (1u << i)) == 0) {
+ continue;
+ }
+ msi->rba_sum &= ~(1u << i);
+ for (j = 0; j < 64; j++) {
+ if ((msi->rba[i] & (1ull << j)) == 0) {
+ continue;
+ }
+ msi->rba[i] &= ~(1u << j);
+ phb3_msi_try_send(msi, i * 64 + j, true);
+ }
+ }
+}
+
+static void phb3_msi_reset(DeviceState *dev)
+{
+ Phb3MsiState *msi = PHB3_MSI(dev);
+ ICSStateClass *icsc = ICS_GET_CLASS(dev);
+
+ icsc->parent_reset(dev);
+
+ memset(msi->rba, 0, sizeof(msi->rba));
+ msi->rba_sum = 0;
+}
+
+static void phb3_msi_reset_handler(void *dev)
+{
+ phb3_msi_reset(dev);
+}
+
+void pnv_phb3_msi_update_config(Phb3MsiState *msi, uint32_t base,
+ uint32_t count)
+{
+ ICSState *ics = ICS(msi);
+
+ if (count > PHB3_MAX_MSI) {
+ count = PHB3_MAX_MSI;
+ }
+ ics->nr_irqs = count;
+ ics->offset = base;
+}
+
+static void phb3_msi_realize(DeviceState *dev, Error **errp)
+{
+ Phb3MsiState *msi = PHB3_MSI(dev);
+ ICSState *ics = ICS(msi);
+ ICSStateClass *icsc = ICS_GET_CLASS(ics);
+ Error *local_err = NULL;
+
+ assert(msi->phb);
+
+ icsc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ msi->qirqs = qemu_allocate_irqs(phb3_msi_set_irq, msi, ics->nr_irqs);
+
+ qemu_register_reset(phb3_msi_reset_handler, dev);
+}
+
+static void phb3_msi_instance_init(Object *obj)
+{
+ Phb3MsiState *msi = PHB3_MSI(obj);
+ ICSState *ics = ICS(obj);
+
+ object_property_add_link(obj, "phb", TYPE_PNV_PHB3,
+ (Object **)&msi->phb,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG,
+ &error_abort);
+
+ /* Will be overriden later */
+ ics->offset = 0;
+}
+
+static void phb3_msi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ICSStateClass *isc = ICS_CLASS(klass);
+
+ device_class_set_parent_realize(dc, phb3_msi_realize,
+ &isc->parent_realize);
+ device_class_set_parent_reset(dc, phb3_msi_reset,
+ &isc->parent_reset);
+
+ isc->reject = phb3_msi_reject;
+ isc->resend = phb3_msi_resend;
+}
+
+static const TypeInfo phb3_msi_info = {
+ .name = TYPE_PHB3_MSI,
+ .parent = TYPE_ICS,
+ .instance_size = sizeof(Phb3MsiState),
+ .class_init = phb3_msi_class_init,
+ .class_size = sizeof(ICSStateClass),
+ .instance_init = phb3_msi_instance_init,
+};
+
+static void pnv_phb3_msi_register_types(void)
+{
+ type_register_static(&phb3_msi_info);
+}
+
+type_init(pnv_phb3_msi_register_types);
+
+void pnv_phb3_msi_pic_print_info(Phb3MsiState *msi, Monitor *mon)
+{
+ ICSState *ics = ICS(msi);
+ int i;
+
+ monitor_printf(mon, "ICS %4x..%4x %p\n",
+ ics->offset, ics->offset + ics->nr_irqs - 1, ics);
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ uint64_t ive;
+
+ if (!phb3_msi_read_ive(msi->phb, i, &ive)) {
+ return;
+ }
+
+ if (GETFIELD(IODA2_IVT_PRIORITY, ive) == 0xff) {
+ continue;
+ }
+
+ monitor_printf(mon, " %4x %c%c server=%04x prio=%02x gen=%d\n",
+ ics->offset + i,
+ GETFIELD(IODA2_IVT_P, ive) ? 'P' : '-',
+ GETFIELD(IODA2_IVT_Q, ive) ? 'Q' : '-',
+ (uint32_t) GETFIELD(IODA2_IVT_SERVER, ive) >> 2,
+ (uint32_t) GETFIELD(IODA2_IVT_PRIORITY, ive),
+ (uint32_t) GETFIELD(IODA2_IVT_GEN, ive));
+ }
+}
diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c
new file mode 100644
index 0000000..f232228
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_pbcq.c
@@ -0,0 +1,358 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/log.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+
+#include <libfdt.h>
+
+#define phb3_pbcq_error(pbcq, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb3_pbcq[%d:%d]: " fmt "\n", \
+ (pbcq)->phb->chip_id, (pbcq)->phb->phb_id, ## __VA_ARGS__)
+
+static uint64_t pnv_pbcq_nest_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t offset = addr >> 3;
+
+ return pbcq->nest_regs[offset];
+}
+
+static uint64_t pnv_pbcq_pci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t offset = addr >> 3;
+
+ return pbcq->pci_regs[offset];
+}
+
+static uint64_t pnv_pbcq_spci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t offset = addr >> 3;
+
+ if (offset == PBCQ_SPCI_ASB_DATA) {
+ return pnv_phb3_reg_read(pbcq->phb,
+ pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], 8);
+ }
+ return pbcq->spci_regs[offset];
+}
+
+static void pnv_pbcq_update_map(PnvPBCQState *pbcq)
+{
+ uint64_t bar_en = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+ uint64_t bar, mask, size;
+
+ /*
+ * NOTE: This will really not work well if those are remapped
+ * after the PHB has created its sub regions. We could do better
+ * if we had a way to resize regions but we don't really care
+ * that much in practice as the stuff below really only happens
+ * once early during boot
+ */
+
+ /* Handle unmaps */
+ if (memory_region_is_mapped(&pbcq->mmbar0) &&
+ !(bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+ memory_region_del_subregion(get_system_memory(), &pbcq->mmbar0);
+ }
+ if (memory_region_is_mapped(&pbcq->mmbar1) &&
+ !(bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+ memory_region_del_subregion(get_system_memory(), &pbcq->mmbar1);
+ }
+ if (memory_region_is_mapped(&pbcq->phbbar) &&
+ !(bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+ memory_region_del_subregion(get_system_memory(), &pbcq->phbbar);
+ }
+
+ /* Update PHB */
+ pnv_phb3_update_regions(pbcq->phb);
+
+ /* Handle maps */
+ if (!memory_region_is_mapped(&pbcq->mmbar0) &&
+ (bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+ bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] >> 14;
+ mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0];
+ size = ((~mask) >> 14) + 1;
+ memory_region_init(&pbcq->mmbar0, OBJECT(pbcq), "pbcq-mmio0", size);
+ memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar0);
+ pbcq->mmio0_base = bar;
+ pbcq->mmio0_size = size;
+ }
+ if (!memory_region_is_mapped(&pbcq->mmbar1) &&
+ (bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+ bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] >> 14;
+ mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1];
+ size = ((~mask) >> 14) + 1;
+ memory_region_init(&pbcq->mmbar1, OBJECT(pbcq), "pbcq-mmio1", size);
+ memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar1);
+ pbcq->mmio1_base = bar;
+ pbcq->mmio1_size = size;
+ }
+ if (!memory_region_is_mapped(&pbcq->phbbar)
+ && (bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+ bar = pbcq->nest_regs[PBCQ_NEST_PHB_BAR] >> 14;
+ size = 0x1000;
+ memory_region_init(&pbcq->phbbar, OBJECT(pbcq), "pbcq-phb", size);
+ memory_region_add_subregion(get_system_memory(), bar, &pbcq->phbbar);
+ }
+
+ /* Update PHB */
+ pnv_phb3_update_regions(pbcq->phb);
+}
+
+static void pnv_pbcq_nest_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PBCQ_NEST_MMIO_BAR0:
+ case PBCQ_NEST_MMIO_BAR1:
+ case PBCQ_NEST_MMIO_MASK0:
+ case PBCQ_NEST_MMIO_MASK1:
+ if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] &
+ (PBCQ_NEST_BAR_EN_MMIO0 |
+ PBCQ_NEST_BAR_EN_MMIO1)) {
+ phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported");
+ }
+ pbcq->nest_regs[reg] = val & 0xffffffffc0000000ull;
+ break;
+ case PBCQ_NEST_PHB_BAR:
+ if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & PBCQ_NEST_BAR_EN_PHB) {
+ phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported");
+ }
+ pbcq->nest_regs[reg] = val & 0xfffffffffc000000ull;
+ break;
+ case PBCQ_NEST_BAR_EN:
+ pbcq->nest_regs[reg] = val & 0xf800000000000000ull;
+ pnv_pbcq_update_map(pbcq);
+ pnv_phb3_remap_irqs(pbcq->phb);
+ break;
+ case PBCQ_NEST_IRSN_COMPARE:
+ case PBCQ_NEST_IRSN_MASK:
+ pbcq->nest_regs[reg] = val & PBCQ_NEST_IRSN_COMP;
+ pnv_phb3_remap_irqs(pbcq->phb);
+ break;
+ case PBCQ_NEST_LSI_SRC_ID:
+ pbcq->nest_regs[reg] = val & PBCQ_NEST_LSI_SRC;
+ pnv_phb3_remap_irqs(pbcq->phb);
+ break;
+ default:
+ phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+ addr, val);
+ }
+}
+
+static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PBCQ_PCI_BAR2:
+ pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull;
+ pnv_pbcq_update_map(pbcq);
+ default:
+ phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+ addr, val);
+ }
+}
+
+static void pnv_pbcq_spci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PBCQ_SPCI_ASB_ADDR:
+ pbcq->spci_regs[reg] = val & 0xfff;
+ break;
+ case PBCQ_SPCI_ASB_STATUS:
+ pbcq->spci_regs[reg] &= ~val;
+ break;
+ case PBCQ_SPCI_ASB_DATA:
+ pnv_phb3_reg_write(pbcq->phb, pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR],
+ val, 8);
+ break;
+ case PBCQ_SPCI_AIB_CAPP_EN:
+ case PBCQ_SPCI_CAPP_SEC_TMR:
+ break;
+ default:
+ phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+ addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pbcq_nest_xscom_ops = {
+ .read = pnv_pbcq_nest_xscom_read,
+ .write = pnv_pbcq_nest_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static const MemoryRegionOps pnv_pbcq_pci_xscom_ops = {
+ .read = pnv_pbcq_pci_xscom_read,
+ .write = pnv_pbcq_pci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static const MemoryRegionOps pnv_pbcq_spci_xscom_ops = {
+ .read = pnv_pbcq_spci_xscom_read,
+ .write = pnv_pbcq_spci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_pbcq_default_bars(PnvPBCQState *pbcq)
+{
+ uint64_t mm0, mm1, reg;
+ PnvPHB3 *phb = pbcq->phb;
+
+ mm0 = 0x3d00000000000ull + 0x4000000000ull * phb->chip_id +
+ 0x1000000000ull * phb->phb_id;
+ mm1 = 0x3ff8000000000ull + 0x0200000000ull * phb->chip_id +
+ 0x0080000000ull * phb->phb_id;
+ reg = 0x3fffe40000000ull + 0x0000400000ull * phb->chip_id +
+ 0x0000100000ull * phb->phb_id;
+
+ pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] = mm0 << 14;
+ pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] = mm1 << 14;
+ pbcq->nest_regs[PBCQ_NEST_PHB_BAR] = reg << 14;
+ pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0] = 0x3fff000000000ull << 14;
+ pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1] = 0x3ffff80000000ull << 14;
+ pbcq->pci_regs[PBCQ_PCI_BAR2] = reg << 14;
+}
+
+static void pnv_pbcq_realize(DeviceState *dev, Error **errp)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(dev);
+ PnvPHB3 *phb;
+ char name[32];
+
+ assert(pbcq->phb);
+ phb = pbcq->phb;
+
+ /* TODO: Fix OPAL to do that: establish default BAR values */
+ pnv_pbcq_default_bars(pbcq);
+
+ /* Initialize the XSCOM region for the PBCQ registers */
+ snprintf(name, sizeof(name), "xscom-pbcq-nest-%d.%d",
+ phb->chip_id, phb->phb_id);
+ pnv_xscom_region_init(&pbcq->xscom_nest_regs, OBJECT(dev),
+ &pnv_pbcq_nest_xscom_ops, pbcq, name,
+ PNV_XSCOM_PBCQ_NEST_SIZE);
+ snprintf(name, sizeof(name), "xscom-pbcq-pci-%d.%d",
+ phb->chip_id, phb->phb_id);
+ pnv_xscom_region_init(&pbcq->xscom_pci_regs, OBJECT(dev),
+ &pnv_pbcq_pci_xscom_ops, pbcq, name,
+ PNV_XSCOM_PBCQ_PCI_SIZE);
+ snprintf(name, sizeof(name), "xscom-pbcq-spci-%d.%d",
+ phb->chip_id, phb->phb_id);
+ pnv_xscom_region_init(&pbcq->xscom_spci_regs, OBJECT(dev),
+ &pnv_pbcq_spci_xscom_ops, pbcq, name,
+ PNV_XSCOM_PBCQ_SPCI_SIZE);
+}
+
+static int pnv_pbcq_dt_xscom(PnvXScomInterface *dev, void *fdt,
+ int xscom_offset)
+{
+ const char compat[] = "ibm,power8-pbcq";
+ PnvPHB3 *phb = PNV_PBCQ(dev)->phb;
+ char *name;
+ int offset;
+ uint32_t lpc_pcba = PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id;
+ uint32_t reg[] = {
+ cpu_to_be32(lpc_pcba),
+ cpu_to_be32(PNV_XSCOM_PBCQ_NEST_SIZE),
+ cpu_to_be32(PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id),
+ cpu_to_be32(PNV_XSCOM_PBCQ_PCI_SIZE),
+ cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id),
+ cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_SIZE)
+ };
+
+ name = g_strdup_printf("pbcq@%x", lpc_pcba);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,phb-index", phb->phb_id)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", phb->chip_id)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", compat,
+ sizeof(compat))));
+ return 0;
+}
+
+static void phb3_pbcq_instance_init(Object *obj)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(obj);
+
+ object_property_add_link(obj, "phb", TYPE_PNV_PHB3,
+ (Object **)&pbcq->phb,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG,
+ &error_abort);
+}
+
+static void pnv_pbcq_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+ xdc->dt_xscom = pnv_pbcq_dt_xscom;
+
+ dc->realize = pnv_pbcq_realize;
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_pbcq_type_info = {
+ .name = TYPE_PNV_PBCQ,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPBCQState),
+ .instance_init = phb3_pbcq_instance_init,
+ .class_init = pnv_pbcq_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_pbcq_register_types(void)
+{
+ type_register_static(&pnv_pbcq_type_info);
+}
+
+type_init(pnv_pbcq_register_types)
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
new file mode 100644
index 0000000..23cf093
--- /dev/null
+++ b/hw/pci-host/pnv_phb4.c
@@ -0,0 +1,1439 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "monitor/monitor.h"
+#include "target/ppc/cpu.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+
+#define phb_error(phb, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n", \
+ (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
+
+/*
+ * QEMU version of the GETFIELD/SETFIELD macros
+ *
+ * These are common with the PnvXive model.
+ */
+static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
+{
+ return (word & mask) >> ctz64(mask);
+}
+
+static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
+ uint64_t value)
+{
+ return (word & ~mask) | ((value << ctz64(mask)) & mask);
+}
+
+static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+ uint8_t bus, devfn;
+
+ if (!(addr >> 63)) {
+ return NULL;
+ }
+ bus = (addr >> 52) & 0xff;
+ devfn = (addr >> 44) & 0xff;
+
+ /* We don't access the root complex this way */
+ if (bus == 0 && devfn == 0) {
+ return NULL;
+ }
+ return pci_find_device(pci->bus, bus, devfn);
+}
+
+/*
+ * The CONFIG_DATA register expects little endian accesses, but as the
+ * region is big endian, we have to swap the value.
+ */
+static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
+ unsigned size, uint64_t val)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+
+ pdev = pnv_phb4_find_cfg_dev(phb);
+ if (!pdev) {
+ return;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return;
+ }
+ switch (size) {
+ case 1:
+ break;
+ case 2:
+ val = bswap16(val);
+ break;
+ case 4:
+ val = bswap32(val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
+ unsigned size)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+ uint64_t val;
+
+ pdev = pnv_phb4_find_cfg_dev(phb);
+ if (!pdev) {
+ return ~0ull;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return ~0ull;
+ }
+ val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+ switch (size) {
+ case 1:
+ return val;
+ case 2:
+ return bswap16(val);
+ case 4:
+ return bswap32(val);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * Root complex register accesses are memory mapped.
+ */
+static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
+ unsigned size, uint64_t val)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ PCIDevice *pdev;
+
+ if (size != 4) {
+ phb_error(phb, "rc_config_write invalid size %d\n", size);
+ return;
+ }
+
+ pdev = pci_find_device(pci->bus, 0, 0);
+ assert(pdev);
+
+ pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
+ bswap32(val), 4);
+}
+
+static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
+ unsigned size)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ PCIDevice *pdev;
+ uint64_t val;
+
+ if (size != 4) {
+ phb_error(phb, "rc_config_read invalid size %d\n", size);
+ return ~0ull;
+ }
+
+ pdev = pci_find_device(pci->bus, 0, 0);
+ assert(pdev);
+
+ val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
+ return bswap32(val);
+}
+
+static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
+{
+ uint64_t base, start, size, mbe0, mbe1;
+ MemoryRegion *parent;
+ char name[64];
+
+ /* Unmap first */
+ if (memory_region_is_mapped(&phb->mr_mmio[index])) {
+ /* Should we destroy it in RCU friendly way... ? */
+ memory_region_del_subregion(phb->mr_mmio[index].container,
+ &phb->mr_mmio[index]);
+ }
+
+ /* Get table entry */
+ mbe0 = phb->ioda_MBT[(index << 1)];
+ mbe1 = phb->ioda_MBT[(index << 1) + 1];
+
+ if (!(mbe0 & IODA3_MBT0_ENABLE)) {
+ return;
+ }
+
+ /* Grab geometry from registers */
+ base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
+ size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
+ size |= 0xff00000000000000ull;
+ size = ~size + 1;
+
+ /* Calculate PCI side start address based on M32/M64 window type */
+ if (mbe0 & IODA3_MBT0_TYPE_M32) {
+ start = phb->regs[PHB_M32_START_ADDR >> 3];
+ if ((start + size) > 0x100000000ull) {
+ phb_error(phb, "M32 set beyond 4GB boundary !");
+ size = 0x100000000 - start;
+ }
+ } else {
+ start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+ }
+
+ /* TODO: Figure out how to implemet/decode AOMASK */
+
+ /* Check if it matches an enabled MMIO region in the PEC stack */
+ if (memory_region_is_mapped(&phb->stack->mmbar0) &&
+ base >= phb->stack->mmio0_base &&
+ (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
+ parent = &phb->stack->mmbar0;
+ base -= phb->stack->mmio0_base;
+ } else if (memory_region_is_mapped(&phb->stack->mmbar1) &&
+ base >= phb->stack->mmio1_base &&
+ (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
+ parent = &phb->stack->mmbar1;
+ base -= phb->stack->mmio1_base;
+ } else {
+ phb_error(phb, "PHB MBAR %d out of parent bounds", index);
+ return;
+ }
+
+ /* Create alias (better name ?) */
+ snprintf(name, sizeof(name), "phb4-mbar%d", index);
+ memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
+ &phb->pci_mmio, start, size);
+ memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
+}
+
+static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
+{
+ uint64_t i;
+ uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
+ PNV_PHB4_MIN_MMIO_WINDOWS;
+
+ for (i = 0; i < num_windows; i++) {
+ pnv_phb4_check_mbt(phb, i);
+ }
+}
+
+static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
+ unsigned *out_table, unsigned *out_idx)
+{
+ uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+ unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+ unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+ unsigned int mask;
+ uint64_t *tptr = NULL;
+
+ switch (table) {
+ case IODA3_TBL_LIST:
+ tptr = phb->ioda_LIST;
+ mask = 7;
+ break;
+ case IODA3_TBL_MIST:
+ tptr = phb->ioda_MIST;
+ mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_RCAM:
+ mask = phb->big_phb ? 127 : 63;
+ break;
+ case IODA3_TBL_MRT:
+ mask = phb->big_phb ? 15 : 7;
+ break;
+ case IODA3_TBL_PESTA:
+ case IODA3_TBL_PESTB:
+ mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_TVT:
+ tptr = phb->ioda_TVT;
+ mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_TCR:
+ case IODA3_TBL_TDR:
+ mask = phb->big_phb ? 1023 : 511;
+ break;
+ case IODA3_TBL_MBT:
+ tptr = phb->ioda_MBT;
+ mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_MDT:
+ tptr = phb->ioda_MDT;
+ mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_PEEV:
+ tptr = phb->ioda_PEEV;
+ mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
+ mask -= 1;
+ break;
+ default:
+ phb_error(phb, "invalid IODA table %d", table);
+ return NULL;
+ }
+ index &= mask;
+ if (out_idx) {
+ *out_idx = index;
+ }
+ if (out_table) {
+ *out_table = table;
+ }
+ if (tptr) {
+ tptr += index;
+ }
+ if (adreg & PHB_IODA_AD_AUTOINC) {
+ index = (index + 1) & mask;
+ adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+ }
+
+ phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+ return tptr;
+}
+
+static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
+{
+ unsigned table, idx;
+ uint64_t *tptr;
+
+ tptr = pnv_phb4_ioda_access(phb, &table, &idx);
+ if (!tptr) {
+ /* Special PESTA case */
+ if (table == IODA3_TBL_PESTA) {
+ return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
+ } else if (table == IODA3_TBL_PESTB) {
+ return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
+ }
+ /* Return 0 on unsupported tables, not ff's */
+ return 0;
+ }
+ return *tptr;
+}
+
+static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
+{
+ unsigned table, idx;
+ uint64_t *tptr;
+
+ tptr = pnv_phb4_ioda_access(phb, &table, &idx);
+ if (!tptr) {
+ /* Special PESTA case */
+ if (table == IODA3_TBL_PESTA) {
+ phb->ioda_PEST_AB[idx] &= ~1;
+ phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
+ } else if (table == IODA3_TBL_PESTB) {
+ phb->ioda_PEST_AB[idx] &= ~2;
+ phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
+ }
+ return;
+ }
+
+ /* Handle side effects */
+ switch (table) {
+ case IODA3_TBL_LIST:
+ break;
+ case IODA3_TBL_MIST: {
+ /* Special mask for MIST partial write */
+ uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+ uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
+ uint64_t v = *tptr;
+ if (mmask == 0) {
+ mmask = 0xf;
+ }
+ if (mmask & 8) {
+ v &= 0x0000ffffffffffffull;
+ v |= 0xcfff000000000000ull & val;
+ }
+ if (mmask & 4) {
+ v &= 0xffff0000ffffffffull;
+ v |= 0x0000cfff00000000ull & val;
+ }
+ if (mmask & 2) {
+ v &= 0xffffffff0000ffffull;
+ v |= 0x00000000cfff0000ull & val;
+ }
+ if (mmask & 1) {
+ v &= 0xffffffffffff0000ull;
+ v |= 0x000000000000cfffull & val;
+ }
+ *tptr = val;
+ break;
+ }
+ case IODA3_TBL_MBT:
+ *tptr = val;
+
+ /* Copy accross the valid bit to the other half */
+ phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
+ phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
+
+ /* Update mappings */
+ pnv_phb4_check_mbt(phb, idx >> 1);
+ break;
+ default:
+ *tptr = val;
+ }
+}
+
+static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
+{
+ PnvPhb4DMASpace *ds;
+
+ /* Always invalidate all for now ... */
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ ds->pe_num = PHB_INVALID_PE;
+ }
+}
+
+static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
+{
+ uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
+
+ if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
+ if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ 0xffff0000, &ds->msi32_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi32_mr);
+ }
+ }
+
+ if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
+ if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ (1ull << 60), &ds->msi64_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi64_mr);
+ }
+ }
+}
+
+static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
+{
+ PnvPhb4DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ pnv_phb4_update_msi_regions(ds);
+ }
+}
+
+static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
+{
+ int shift, flags, i, lsi_base;
+ XiveSource *xsrc = &phb->xsrc;
+
+ /* The XIVE source characteristics can be set at run time */
+ if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
+ shift = XIVE_ESB_64K;
+ } else {
+ shift = XIVE_ESB_4K;
+ }
+ if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
+ flags = XIVE_SRC_STORE_EOI;
+ } else {
+ flags = 0;
+ }
+
+ phb->xsrc.esb_shift = shift;
+ phb->xsrc.esb_flags = flags;
+
+ lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
+ lsi_base <<= 3;
+
+ /* TODO: handle reset values of PHB_LSI_SRC_ID */
+ if (!lsi_base) {
+ return;
+ }
+
+ /* TODO: need a xive_source_irq_reset_lsi() */
+ bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ bool msi = (i < lsi_base || i >= (lsi_base + 8));
+ if (!msi) {
+ xive_source_irq_set_lsi(xsrc, i);
+ }
+ }
+}
+
+static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
+ unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ bool changed;
+
+ /* Special case outbound configuration data */
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ pnv_phb4_config_write(phb, off & 0x3, size, val);
+ return;
+ }
+
+ /* Special case RC configuration space */
+ if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
+ pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
+ return;
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return;
+ }
+
+ /* Handle masking */
+ switch (off) {
+ case PHB_LSI_SOURCE_ID:
+ val &= PHB_LSI_SRC_ID;
+ break;
+ case PHB_M64_UPPER_BITS:
+ val &= 0xff00000000000000ull;
+ break;
+ /* TCE Kill */
+ case PHB_TCE_KILL:
+ /* Clear top 3 bits which HW does to indicate successful queuing */
+ val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
+ break;
+ case PHB_Q_DMA_R:
+ /*
+ * This is enough logic to make SW happy but we aren't
+ * actually quiescing the DMAs
+ */
+ if (val & PHB_Q_DMA_R_AUTORESET) {
+ val = 0;
+ } else {
+ val &= PHB_Q_DMA_R_QUIESCE_DMA;
+ }
+ break;
+ /* LEM stuff */
+ case PHB_LEM_FIR_AND_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
+ return;
+ case PHB_LEM_FIR_OR_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
+ return;
+ case PHB_LEM_ERROR_AND_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
+ return;
+ case PHB_LEM_ERROR_OR_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
+ return;
+ case PHB_LEM_WOF:
+ val = 0;
+ break;
+ /* TODO: More regs ..., maybe create a table with masks... */
+
+ /* Read only registers */
+ case PHB_CPU_LOADSTORE_STATUS:
+ case PHB_ETU_ERR_SUMMARY:
+ case PHB_PHB4_GEN_CAP:
+ case PHB_PHB4_TCE_CAP:
+ case PHB_PHB4_IRQ_CAP:
+ case PHB_PHB4_EEH_CAP:
+ return;
+ }
+
+ /* Record whether it changed */
+ changed = phb->regs[off >> 3] != val;
+
+ /* Store in register cache first */
+ phb->regs[off >> 3] = val;
+
+ /* Handle side effects */
+ switch (off) {
+ case PHB_PHB4_CONFIG:
+ if (changed) {
+ pnv_phb4_update_all_msi_regions(phb);
+ }
+ break;
+ case PHB_M32_START_ADDR:
+ case PHB_M64_UPPER_BITS:
+ if (changed) {
+ pnv_phb4_check_all_mbt(phb);
+ }
+ break;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ pnv_phb4_ioda_write(phb, val);
+ break;
+
+ /* RTC invalidation */
+ case PHB_RTC_INVALIDATE:
+ pnv_phb4_rtc_invalidate(phb, val);
+ break;
+
+ /* PHB Control (Affects XIVE source) */
+ case PHB_CTRLR:
+ case PHB_LSI_SOURCE_ID:
+ pnv_phb4_update_xsrc(phb);
+ break;
+
+ /* Silent simple writes */
+ case PHB_ASN_CMPM:
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ case PHB_TCE_TAG_ENABLE:
+ case PHB_INT_NOTIFY_ADDR:
+ case PHB_INT_NOTIFY_INDEX:
+ case PHB_DMARD_SYNC:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+}
+
+static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint64_t val;
+
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ return pnv_phb4_config_read(phb, off & 0x3, size);
+ }
+
+ /* Special case RC configuration space */
+ if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
+ return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return ~0ull;
+ }
+
+ /* Default read from cache */
+ val = phb->regs[off >> 3];
+
+ switch (off) {
+ case PHB_VERSION:
+ return phb->version;
+
+ /* Read-only */
+ case PHB_PHB4_GEN_CAP:
+ return 0xe4b8000000000000ull;
+ case PHB_PHB4_TCE_CAP:
+ return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
+ case PHB_PHB4_IRQ_CAP:
+ return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
+ case PHB_PHB4_EEH_CAP:
+ return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ return pnv_phb4_ioda_read(phb);
+
+ /* Link training always appears trained */
+ case PHB_PCIE_DLP_TRAIN_CTL:
+ /* TODO: Do something sensible with speed ? */
+ return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
+
+ /* DMA read sync: make it look like it's complete */
+ case PHB_DMARD_SYNC:
+ return PHB_DMARD_SYNC_COMPLETE;
+
+ /* Silent simple reads */
+ case PHB_LSI_SOURCE_ID:
+ case PHB_CPU_LOADSTORE_STATUS:
+ case PHB_ASN_CMPM:
+ case PHB_PHB4_CONFIG:
+ case PHB_M32_START_ADDR:
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_RTC_INVALIDATE:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_M64_UPPER_BITS:
+ case PHB_CTRLR:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ case PHB_TCE_TAG_ENABLE:
+ case PHB_INT_NOTIFY_ADDR:
+ case PHB_INT_NOTIFY_INDEX:
+ case PHB_Q_DMA_R:
+ case PHB_ETU_ERR_SUMMARY:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+ return val;
+}
+
+static const MemoryRegionOps pnv_phb4_reg_ops = {
+ .read = pnv_phb4_reg_read,
+ .write = pnv_phb4_reg_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint32_t reg = addr >> 3;
+ uint64_t val;
+ hwaddr offset;
+
+ switch (reg) {
+ case PHB_SCOM_HV_IND_ADDR:
+ return phb->scom_hv_ind_addr_reg;
+
+ case PHB_SCOM_HV_IND_DATA:
+ if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
+ phb_error(phb, "Invalid indirect address");
+ return ~0ull;
+ }
+ size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
+ offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
+ val = pnv_phb4_reg_read(phb, offset, size);
+ if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
+ offset += size;
+ offset &= 0x3fff;
+ phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
+ phb->scom_hv_ind_addr_reg,
+ offset);
+ }
+ return val;
+ case PHB_SCOM_ETU_LEM_FIR:
+ case PHB_SCOM_ETU_LEM_FIR_AND:
+ case PHB_SCOM_ETU_LEM_FIR_OR:
+ case PHB_SCOM_ETU_LEM_FIR_MSK:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
+ case PHB_SCOM_ETU_LEM_ACT0:
+ case PHB_SCOM_ETU_LEM_ACT1:
+ case PHB_SCOM_ETU_LEM_WOF:
+ offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
+ return pnv_phb4_reg_read(phb, offset, size);
+ case PHB_SCOM_ETU_PMON_CONFIG:
+ case PHB_SCOM_ETU_PMON_CTR0:
+ case PHB_SCOM_ETU_PMON_CTR1:
+ case PHB_SCOM_ETU_PMON_CTR2:
+ case PHB_SCOM_ETU_PMON_CTR3:
+ offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
+ return pnv_phb4_reg_read(phb, offset, size);
+
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
+ return ~0ull;
+ }
+}
+
+static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint32_t reg = addr >> 3;
+ hwaddr offset;
+
+ switch (reg) {
+ case PHB_SCOM_HV_IND_ADDR:
+ phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
+ break;
+ case PHB_SCOM_HV_IND_DATA:
+ if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
+ phb_error(phb, "Invalid indirect address");
+ break;
+ }
+ size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
+ offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
+ pnv_phb4_reg_write(phb, offset, val, size);
+ if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
+ offset += size;
+ offset &= 0x3fff;
+ phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
+ phb->scom_hv_ind_addr_reg,
+ offset);
+ }
+ break;
+ case PHB_SCOM_ETU_LEM_FIR:
+ case PHB_SCOM_ETU_LEM_FIR_AND:
+ case PHB_SCOM_ETU_LEM_FIR_OR:
+ case PHB_SCOM_ETU_LEM_FIR_MSK:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
+ case PHB_SCOM_ETU_LEM_ACT0:
+ case PHB_SCOM_ETU_LEM_ACT1:
+ case PHB_SCOM_ETU_LEM_WOF:
+ offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
+ pnv_phb4_reg_write(phb, offset, val, size);
+ break;
+ case PHB_SCOM_ETU_PMON_CONFIG:
+ case PHB_SCOM_ETU_PMON_CTR0:
+ case PHB_SCOM_ETU_PMON_CTR1:
+ case PHB_SCOM_ETU_PMON_CTR2:
+ case PHB_SCOM_ETU_PMON_CTR3:
+ offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
+ pnv_phb4_reg_write(phb, offset, val, size);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
+ "=%"PRIx64"\n", addr, val);
+ }
+}
+
+const MemoryRegionOps pnv_phb4_xscom_ops = {
+ .read = pnv_phb4_xscom_read,
+ .write = pnv_phb4_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ /* Check that out properly ... */
+ return irq_num & 3;
+}
+
+static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint32_t lsi_base;
+
+ /* LSI only ... */
+ if (irq_num > 3) {
+ phb_error(phb, "IRQ %x is not an LSI", irq_num);
+ }
+ lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
+ lsi_base <<= 3;
+ qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
+}
+
+static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
+{
+ uint64_t rtt, addr;
+ uint16_t rte;
+ int bus_num;
+ int num_PEs;
+
+ /* Already resolved ? */
+ if (ds->pe_num != PHB_INVALID_PE) {
+ return true;
+ }
+
+ /* We need to lookup the RTT */
+ rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+ if (!(rtt & PHB_RTT_BAR_ENABLE)) {
+ phb_error(ds->phb, "DMA with RTT BAR disabled !");
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+
+ /* Read RTE */
+ bus_num = pci_bus_num(ds->bus);
+ addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+ addr += 2 * ((bus_num << 8) | ds->devfn);
+ if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+ phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+ rte = be16_to_cpu(rte);
+
+ /* Fail upon reading of invalid PE# */
+ num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+ if (rte >= num_PEs) {
+ phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
+ rte &= num_PEs - 1;
+ }
+ ds->pe_num = rte;
+ return true;
+}
+
+static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
+ bool is_write, uint64_t tve,
+ IOMMUTLBEntry *tlb)
+{
+ uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
+ int32_t lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
+ uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
+ uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
+
+ /* Invalid levels */
+ if (lev > 4) {
+ phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
+ return;
+ }
+
+ /* Invalid entry */
+ if (tts == 0) {
+ phb_error(ds->phb, "Access to invalid TVE");
+ return;
+ }
+
+ /* IO Page Size of 0 means untranslated, else use TCEs */
+ if (tps == 0) {
+ /* TODO: Handle boundaries */
+
+ /* Use 4k pages like q35 ... for now */
+ tlb->iova = addr & 0xfffffffffffff000ull;
+ tlb->translated_addr = addr & 0x0003fffffffff000ull;
+ tlb->addr_mask = 0xfffull;
+ tlb->perm = IOMMU_RW;
+ } else {
+ uint32_t tce_shift, tbl_shift, sh;
+ uint64_t base, taddr, tce, tce_mask;
+
+ /* Address bits per bottom level TCE entry */
+ tce_shift = tps + 11;
+
+ /* Address bits per table level */
+ tbl_shift = tts + 8;
+
+ /* Top level table base address */
+ base = tta << 12;
+
+ /* Total shift to first level */
+ sh = tbl_shift * lev + tce_shift;
+
+ /* TODO: Limit to support IO page sizes */
+
+ /* TODO: Multi-level untested */
+ while ((lev--) >= 0) {
+ /* Grab the TCE address */
+ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+ if (dma_memory_read(&address_space_memory, taddr, &tce,
+ sizeof(tce))) {
+ phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
+ return;
+ }
+ tce = be64_to_cpu(tce);
+
+ /* Check permission for indirect TCE */
+ if ((lev >= 0) && !(tce & 3)) {
+ phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
+ phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ return;
+ }
+ sh -= tbl_shift;
+ base = tce & ~0xfffull;
+ }
+
+ /* We exit the loop with TCE being the final TCE */
+ tce_mask = ~((1ull << tce_shift) - 1);
+ tlb->iova = addr & tce_mask;
+ tlb->translated_addr = tce & tce_mask;
+ tlb->addr_mask = ~tce_mask;
+ tlb->perm = tce & 3;
+ if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+ phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
+ phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ }
+ }
+}
+
+static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flag,
+ int iommu_idx)
+{
+ PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
+ int tve_sel;
+ uint64_t tve, cfg;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE,
+ };
+
+ /* Resolve PE# */
+ if (!pnv_phb4_resolve_pe(ds)) {
+ phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return ret;
+ }
+
+ /* Check top bits */
+ switch (addr >> 60) {
+ case 00:
+ /* DMA or 32-bit MSI ? */
+ cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
+ if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
+ ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+ phb_error(ds->phb, "xlate on 32-bit MSI region");
+ return ret;
+ }
+ /* Choose TVE XXX Use PHB4 Control Register */
+ tve_sel = (addr >> 59) & 1;
+ tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+ pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
+ break;
+ case 01:
+ phb_error(ds->phb, "xlate on 64-bit MSI region");
+ break;
+ default:
+ phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
+ }
+ return ret;
+}
+
+#define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
+#define PNV_PHB4_IOMMU_MEMORY_REGION(obj) \
+ OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
+
+static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
+ void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = pnv_phb4_translate_iommu;
+}
+
+static const TypeInfo pnv_phb4_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
+ .class_init = pnv_phb4_iommu_memory_region_class_init,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ PnvPhb4DMASpace *ds = opaque;
+ PnvPHB4 *phb = ds->phb;
+
+ uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+ /* Resolve PE# */
+ if (!pnv_phb4_resolve_pe(ds)) {
+ phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return;
+ }
+
+ /* TODO: Check it doesn't collide with LSIs */
+ if (src >= phb->xsrc.nr_irqs) {
+ phb_error(phb, "MSI %d out of bounds", src);
+ return;
+ }
+
+ /* TODO: check PE/MSI assignement */
+
+ qemu_irq_pulse(phb->qirqs[src]);
+}
+
+/* There is no .read as the read result is undefined by PCI spec */
+static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPhb4DMASpace *ds = opaque;
+
+ phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
+ return -1;
+}
+
+static const MemoryRegionOps pnv_phb4_msi_ops = {
+ .read = pnv_phb4_msi_read,
+ .write = pnv_phb4_msi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
+{
+ PnvPhb4DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ if (ds->bus == bus && ds->devfn == devfn) {
+ break;
+ }
+ }
+ return ds;
+}
+
+static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ PnvPHB4 *phb = opaque;
+ PnvPhb4DMASpace *ds;
+ char name[32];
+
+ ds = pnv_phb4_dma_find(phb, bus, devfn);
+
+ if (ds == NULL) {
+ ds = g_malloc0(sizeof(PnvPhb4DMASpace));
+ ds->bus = bus;
+ ds->devfn = devfn;
+ ds->pe_num = PHB_INVALID_PE;
+ ds->phb = phb;
+ snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
+ phb->phb_id);
+ memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
+ TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
+ OBJECT(phb), name, UINT64_MAX);
+ address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
+ name);
+ memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
+ ds, "msi32", 0x10000);
+ memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
+ ds, "msi64", 0x100000);
+ pnv_phb4_update_msi_regions(ds);
+
+ QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+ }
+ return &ds->dma_as;
+}
+
+static void pnv_phb4_instance_init(Object *obj)
+{
+ PnvPHB4 *phb = PNV_PHB4(obj);
+
+ QLIST_INIT(&phb->dma_spaces);
+
+ /* XIVE interrupt source object */
+ object_initialize_child(obj, "source", &phb->xsrc, sizeof(XiveSource),
+ TYPE_XIVE_SOURCE, &error_abort, NULL);
+
+ /* Root Port */
+ object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
+ TYPE_PNV_PHB4_ROOT_PORT, &error_abort, NULL);
+
+ qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
+ qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
+}
+
+static void pnv_phb4_realize(DeviceState *dev, Error **errp)
+{
+ PnvPHB4 *phb = PNV_PHB4(dev);
+ PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+ XiveSource *xsrc = &phb->xsrc;
+ Error *local_err = NULL;
+ int nr_irqs;
+ char name[32];
+
+ assert(phb->stack);
+
+ /* Set the "big_phb" flag */
+ phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
+
+ /* Controller Registers */
+ snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
+ phb->phb_id);
+ memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
+ name, 0x2000);
+
+ /*
+ * PHB4 doesn't support IO space. However, qemu gets very upset if
+ * we don't have an IO region to anchor IO BARs onto so we just
+ * initialize one which we never hook up to anything
+ */
+
+ snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
+ phb->phb_id);
+ memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
+
+ snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
+ phb->phb_id);
+ memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
+ PCI_MMIO_TOTAL_SIZE);
+
+ pci->bus = pci_register_root_bus(dev, "root-bus",
+ pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
+ &phb->pci_mmio, &phb->pci_io,
+ 0, 4, TYPE_PNV_PHB4_ROOT_BUS);
+ pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
+
+ /* Add a single Root port */
+ qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
+ qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
+ qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
+ qdev_init_nofail(DEVICE(&phb->root));
+
+ /* Setup XIVE Source */
+ if (phb->big_phb) {
+ nr_irqs = PNV_PHB4_MAX_INTs;
+ } else {
+ nr_irqs = PNV_PHB4_MAX_INTs >> 1;
+ }
+ object_property_set_int(OBJECT(xsrc), nr_irqs, "nr-irqs", &error_fatal);
+ object_property_set_link(OBJECT(xsrc), OBJECT(phb), "xive", &error_fatal);
+ object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pnv_phb4_update_xsrc(phb);
+
+ phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
+}
+
+static void pnv_phb4_reset(DeviceState *dev)
+{
+ PnvPHB4 *phb = PNV_PHB4(dev);
+ PCIDevice *root_dev = PCI_DEVICE(&phb->root);
+
+ /*
+ * Configure PCI device id at reset using a property.
+ */
+ pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM);
+ pci_config_set_device_id(root_dev->config, phb->device_id);
+}
+
+static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+ PnvPHB4 *phb = PNV_PHB4(host_bridge);
+
+ snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+ phb->chip_id, phb->phb_id);
+ return phb->bus_path;
+}
+
+static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno)
+{
+ PnvPHB4 *phb = PNV_PHB4(xf);
+ uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
+ uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
+ uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
+ MemTxResult result;
+
+ address_space_stq_be(&address_space_memory, notif_port, data,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
+ return;
+ }
+}
+
+static Property pnv_phb4_properties[] = {
+ DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
+ DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
+ DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
+ DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0),
+ DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
+ PnvPhb4PecStack *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb4_class_init(ObjectClass *klass, void *data)
+{
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
+
+ hc->root_bus_path = pnv_phb4_root_bus_path;
+ dc->realize = pnv_phb4_realize;
+ device_class_set_props(dc, pnv_phb4_properties);
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ dc->user_creatable = false;
+ dc->reset = pnv_phb4_reset;
+
+ xfc->notify = pnv_phb4_xive_notify;
+}
+
+static const TypeInfo pnv_phb4_type_info = {
+ .name = TYPE_PNV_PHB4,
+ .parent = TYPE_PCIE_HOST_BRIDGE,
+ .instance_init = pnv_phb4_instance_init,
+ .instance_size = sizeof(PnvPHB4),
+ .class_init = pnv_phb4_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_NOTIFIER },
+ { },
+ }
+};
+
+static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+
+ /*
+ * PHB4 has only a single root complex. Enforce the limit on the
+ * parent bus
+ */
+ k->max_dev = 1;
+}
+
+static const TypeInfo pnv_phb4_root_bus_info = {
+ .name = TYPE_PNV_PHB4_ROOT_BUS,
+ .parent = TYPE_PCIE_BUS,
+ .class_init = pnv_phb4_root_bus_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { }
+ },
+};
+
+static void pnv_phb4_root_port_reset(DeviceState *dev)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ PCIDevice *d = PCI_DEVICE(dev);
+ uint8_t *conf = d->config;
+
+ rpc->parent_reset(dev);
+
+ pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
+ PCI_IO_RANGE_MASK & 0xff);
+ pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
+ PCI_IO_RANGE_MASK & 0xff);
+ pci_set_word(conf + PCI_MEMORY_BASE, 0);
+ pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
+ pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
+ pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
+ pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
+ pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
+}
+
+static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ Error *local_err = NULL;
+
+ rpc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+ dc->desc = "IBM PHB4 PCIE Root Port";
+ dc->user_creatable = false;
+
+ device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
+ &rpc->parent_realize);
+ device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
+ &rpc->parent_reset);
+
+ k->vendor_id = PCI_VENDOR_ID_IBM;
+ k->device_id = PNV_PHB4_DEVICE_ID;
+ k->revision = 0;
+
+ rpc->exp_offset = 0x48;
+ rpc->aer_offset = 0x100;
+
+ dc->reset = &pnv_phb4_root_port_reset;
+}
+
+static const TypeInfo pnv_phb4_root_port_info = {
+ .name = TYPE_PNV_PHB4_ROOT_PORT,
+ .parent = TYPE_PCIE_ROOT_PORT,
+ .instance_size = sizeof(PnvPHB4RootPort),
+ .class_init = pnv_phb4_root_port_class_init,
+};
+
+static void pnv_phb4_register_types(void)
+{
+ type_register_static(&pnv_phb4_root_bus_info);
+ type_register_static(&pnv_phb4_root_port_info);
+ type_register_static(&pnv_phb4_type_info);
+ type_register_static(&pnv_phb4_iommu_memory_region_info);
+}
+
+type_init(pnv_phb4_register_types);
+
+void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
+{
+ PnvPHB4 *phb = &stack->phb;
+
+ /* Unmap first always */
+ if (memory_region_is_mapped(&phb->mr_regs)) {
+ memory_region_del_subregion(&stack->phbbar, &phb->mr_regs);
+ }
+ if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
+ memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio);
+ }
+
+ /* Map registers if enabled */
+ if (memory_region_is_mapped(&stack->phbbar)) {
+ memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs);
+ }
+
+ /* Map ESB if enabled */
+ if (memory_region_is_mapped(&stack->intbar)) {
+ memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio);
+ }
+
+ /* Check/update m32 */
+ pnv_phb4_check_all_mbt(phb);
+}
+
+void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
+{
+ uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
+
+ monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n",
+ phb->chip_id, phb->phb_id,
+ offset, offset + phb->xsrc.nr_irqs - 1);
+ xive_source_pic_print_info(&phb->xsrc, 0, mon);
+}
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
new file mode 100644
index 0000000..68e1db3
--- /dev/null
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -0,0 +1,595 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/log.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/ppc/pnv.h"
+#include "hw/qdev-properties.h"
+
+#include <libfdt.h>
+
+#define phb_pec_error(pec, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n", \
+ (pec)->chip_id, (pec)->index, ## __VA_ARGS__)
+
+
+static uint64_t pnv_pec_nest_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return pec->nest_regs[reg];
+}
+
+static void pnv_pec_nest_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_NEST_PBCQ_HW_CONFIG:
+ case PEC_NEST_DROP_PRIO_CTRL:
+ case PEC_NEST_PBCQ_ERR_INJECT:
+ case PEC_NEST_PCI_NEST_CLK_TRACE_CTL:
+ case PEC_NEST_PBCQ_PMON_CTRL:
+ case PEC_NEST_PBCQ_PBUS_ADDR_EXT:
+ case PEC_NEST_PBCQ_PRED_VEC_TIMEOUT:
+ case PEC_NEST_CAPP_CTRL:
+ case PEC_NEST_PBCQ_READ_STK_OVR:
+ case PEC_NEST_PBCQ_WRITE_STK_OVR:
+ case PEC_NEST_PBCQ_STORE_STK_OVR:
+ case PEC_NEST_PBCQ_RETRY_BKOFF_CTRL:
+ pec->nest_regs[reg] = val;
+ break;
+ default:
+ phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__,
+ addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_nest_xscom_ops = {
+ .read = pnv_pec_nest_xscom_read,
+ .write = pnv_pec_nest_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_pci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return pec->pci_regs[reg];
+}
+
+static void pnv_pec_pci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_PCI_PBAIB_HW_CONFIG:
+ case PEC_PCI_PBAIB_READ_STK_OVR:
+ pec->pci_regs[reg] = val;
+ break;
+ default:
+ phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__,
+ addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_pci_xscom_ops = {
+ .read = pnv_pec_pci_xscom_read,
+ .write = pnv_pec_pci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return stack->nest_regs[reg];
+}
+
+static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
+{
+ PnvPhb4PecState *pec = stack->pec;
+ MemoryRegion *sysmem = pec->system_memory;
+ uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN];
+ uint64_t bar, mask, size;
+ char name[64];
+
+ /*
+ * NOTE: This will really not work well if those are remapped
+ * after the PHB has created its sub regions. We could do better
+ * if we had a way to resize regions but we don't really care
+ * that much in practice as the stuff below really only happens
+ * once early during boot
+ */
+
+ /* Handle unmaps */
+ if (memory_region_is_mapped(&stack->mmbar0) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
+ memory_region_del_subregion(sysmem, &stack->mmbar0);
+ }
+ if (memory_region_is_mapped(&stack->mmbar1) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
+ memory_region_del_subregion(sysmem, &stack->mmbar1);
+ }
+ if (memory_region_is_mapped(&stack->phbbar) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
+ memory_region_del_subregion(sysmem, &stack->phbbar);
+ }
+ if (memory_region_is_mapped(&stack->intbar) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_INT)) {
+ memory_region_del_subregion(sysmem, &stack->intbar);
+ }
+
+ /* Update PHB */
+ pnv_phb4_update_regions(stack);
+
+ /* Handle maps */
+ if (!memory_region_is_mapped(&stack->mmbar0) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
+ bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8;
+ mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
+ size = ((~mask) >> 8) + 1;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0",
+ pec->chip_id, pec->index, stack->stack_no);
+ memory_region_init(&stack->mmbar0, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->mmbar0);
+ stack->mmio0_base = bar;
+ stack->mmio0_size = size;
+ }
+ if (!memory_region_is_mapped(&stack->mmbar1) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
+ bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8;
+ mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
+ size = ((~mask) >> 8) + 1;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1",
+ pec->chip_id, pec->index, stack->stack_no);
+ memory_region_init(&stack->mmbar1, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->mmbar1);
+ stack->mmio1_base = bar;
+ stack->mmio1_size = size;
+ }
+ if (!memory_region_is_mapped(&stack->phbbar) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
+ bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
+ size = PNV_PHB4_NUM_REGS << 3;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb",
+ pec->chip_id, pec->index, stack->stack_no);
+ memory_region_init(&stack->phbbar, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->phbbar);
+ }
+ if (!memory_region_is_mapped(&stack->intbar) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_INT)) {
+ bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
+ size = PNV_PHB4_MAX_INTs << 16;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int",
+ stack->pec->chip_id, stack->pec->index, stack->stack_no);
+ memory_region_init(&stack->intbar, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->intbar);
+ }
+
+ /* Update PHB */
+ pnv_phb4_update_regions(stack);
+}
+
+static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ PnvPhb4PecState *pec = stack->pec;
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_NEST_STK_PCI_NEST_FIR:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_CLR:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_SET:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_MSK:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_MSKC:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_MSKS:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_ACT0:
+ case PEC_NEST_STK_PCI_NEST_FIR_ACT1:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_WOF:
+ stack->nest_regs[reg] = 0;
+ break;
+ case PEC_NEST_STK_ERR_REPORT_0:
+ case PEC_NEST_STK_ERR_REPORT_1:
+ case PEC_NEST_STK_PBCQ_GNRL_STATUS:
+ /* Flag error ? */
+ break;
+ case PEC_NEST_STK_PBCQ_MODE:
+ stack->nest_regs[reg] = val & 0xff00000000000000ull;
+ break;
+ case PEC_NEST_STK_MMIO_BAR0:
+ case PEC_NEST_STK_MMIO_BAR0_MASK:
+ case PEC_NEST_STK_MMIO_BAR1:
+ case PEC_NEST_STK_MMIO_BAR1_MASK:
+ if (stack->nest_regs[PEC_NEST_STK_BAR_EN] &
+ (PEC_NEST_STK_BAR_EN_MMIO0 |
+ PEC_NEST_STK_BAR_EN_MMIO1)) {
+ phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ }
+ stack->nest_regs[reg] = val & 0xffffffffff000000ull;
+ break;
+ case PEC_NEST_STK_PHB_REGS_BAR:
+ if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) {
+ phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ }
+ stack->nest_regs[reg] = val & 0xffffffffffc00000ull;
+ break;
+ case PEC_NEST_STK_INT_BAR:
+ if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) {
+ phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ }
+ stack->nest_regs[reg] = val & 0xfffffff000000000ull;
+ break;
+ case PEC_NEST_STK_BAR_EN:
+ stack->nest_regs[reg] = val & 0xf000000000000000ull;
+ pnv_pec_stk_update_map(stack);
+ break;
+ case PEC_NEST_STK_DATA_FRZ_TYPE:
+ case PEC_NEST_STK_PBCQ_TUN_BAR:
+ /* Not used for now */
+ stack->nest_regs[reg] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx
+ "=%"PRIx64"\n", addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = {
+ .read = pnv_pec_stk_nest_xscom_read,
+ .write = pnv_pec_stk_nest_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return stack->pci_regs[reg];
+}
+
+static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_PCI_STK_PCI_FIR:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_CLR:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR] &= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_SET:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR] |= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_MSK:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_MSKC:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_MSKS:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_ACT0:
+ case PEC_PCI_STK_PCI_FIR_ACT1:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_WOF:
+ stack->nest_regs[reg] = 0;
+ break;
+ case PEC_PCI_STK_ETU_RESET:
+ stack->nest_regs[reg] = val & 0x8000000000000000ull;
+ /* TODO: Implement reset */
+ break;
+ case PEC_PCI_STK_PBAIB_ERR_REPORT:
+ break;
+ case PEC_PCI_STK_PBAIB_TX_CMD_CRED:
+ case PEC_PCI_STK_PBAIB_TX_DAT_CRED:
+ stack->nest_regs[reg] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx
+ "=%"PRIx64"\n", addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = {
+ .read = pnv_pec_stk_pci_xscom_read,
+ .write = pnv_pec_stk_pci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_pec_instance_init(Object *obj)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(obj);
+ int i;
+
+ for (i = 0; i < PHB4_PEC_MAX_STACKS; i++) {
+ object_initialize_child(obj, "stack[*]", &pec->stacks[i],
+ sizeof(pec->stacks[i]), TYPE_PNV_PHB4_PEC_STACK,
+ &error_abort, NULL);
+ }
+}
+
+static void pnv_pec_realize(DeviceState *dev, Error **errp)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
+ Error *local_err = NULL;
+ char name[64];
+ int i;
+
+ assert(pec->system_memory);
+
+ /* Create stacks */
+ for (i = 0; i < pec->num_stacks; i++) {
+ PnvPhb4PecStack *stack = &pec->stacks[i];
+ Object *stk_obj = OBJECT(stack);
+
+ object_property_set_int(stk_obj, i, "stack-no", &error_abort);
+ object_property_set_link(stk_obj, OBJECT(pec), "pec", &error_abort);
+ object_property_set_bool(stk_obj, true, "realized", errp);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
+ /* Initialize the XSCOM regions for the PEC registers */
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest", pec->chip_id,
+ pec->index);
+ pnv_xscom_region_init(&pec->nest_regs_mr, OBJECT(dev),
+ &pnv_pec_nest_xscom_ops, pec, name,
+ PHB4_PEC_NEST_REGS_COUNT);
+
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci", pec->chip_id,
+ pec->index);
+ pnv_xscom_region_init(&pec->pci_regs_mr, OBJECT(dev),
+ &pnv_pec_pci_xscom_ops, pec, name,
+ PHB4_PEC_PCI_REGS_COUNT);
+}
+
+static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt,
+ int xscom_offset)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(dev);
+ uint32_t nbase = pecc->xscom_nest_base(pec);
+ uint32_t pbase = pecc->xscom_pci_base(pec);
+ int offset, i;
+ char *name;
+ uint32_t reg[] = {
+ cpu_to_be32(nbase),
+ cpu_to_be32(pecc->xscom_nest_size),
+ cpu_to_be32(pbase),
+ cpu_to_be32(pecc->xscom_pci_size),
+ };
+
+ name = g_strdup_printf("pbcq@%x", nbase);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,pec-index", pec->index)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", pecc->compat,
+ pecc->compat_size)));
+
+ for (i = 0; i < pec->num_stacks; i++) {
+ PnvPhb4PecStack *stack = &pec->stacks[i];
+ PnvPHB4 *phb = &stack->phb;
+ int stk_offset;
+
+ name = g_strdup_printf("stack@%x", i);
+ stk_offset = fdt_add_subnode(fdt, offset, name);
+ _FDT(stk_offset);
+ g_free(name);
+ _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat,
+ pecc->stk_compat_size)));
+ _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i)));
+ _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb->phb_id)));
+ }
+
+ return 0;
+}
+
+static Property pnv_pec_properties[] = {
+ DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0),
+ DEFINE_PROP_UINT32("num-stacks", PnvPhb4PecState, num_stacks, 0),
+ DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0),
+ DEFINE_PROP_LINK("system-memory", PnvPhb4PecState, system_memory,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec)
+{
+ return PNV9_XSCOM_PEC_PCI_BASE + 0x1000000 * pec->index;
+}
+
+static uint32_t pnv_pec_xscom_nest_base(PnvPhb4PecState *pec)
+{
+ return PNV9_XSCOM_PEC_NEST_BASE + 0x400 * pec->index;
+}
+
+static void pnv_pec_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_CLASS(klass);
+ static const char compat[] = "ibm,power9-pbcq";
+ static const char stk_compat[] = "ibm,power9-phb-stack";
+
+ xdc->dt_xscom = pnv_pec_dt_xscom;
+
+ dc->realize = pnv_pec_realize;
+ device_class_set_props(dc, pnv_pec_properties);
+ dc->user_creatable = false;
+
+ pecc->xscom_nest_base = pnv_pec_xscom_nest_base;
+ pecc->xscom_pci_base = pnv_pec_xscom_pci_base;
+ pecc->xscom_nest_size = PNV9_XSCOM_PEC_NEST_SIZE;
+ pecc->xscom_pci_size = PNV9_XSCOM_PEC_PCI_SIZE;
+ pecc->compat = compat;
+ pecc->compat_size = sizeof(compat);
+ pecc->stk_compat = stk_compat;
+ pecc->stk_compat_size = sizeof(stk_compat);
+}
+
+static const TypeInfo pnv_pec_type_info = {
+ .name = TYPE_PNV_PHB4_PEC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPhb4PecState),
+ .instance_init = pnv_pec_instance_init,
+ .class_init = pnv_pec_class_init,
+ .class_size = sizeof(PnvPhb4PecClass),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_pec_stk_instance_init(Object *obj)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(obj);
+
+ object_initialize_child(obj, "phb", &stack->phb, sizeof(stack->phb),
+ TYPE_PNV_PHB4, &error_abort, NULL);
+}
+
+static void pnv_pec_stk_realize(DeviceState *dev, Error **errp)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev);
+ PnvPhb4PecState *pec = stack->pec;
+ char name[64];
+
+ assert(pec);
+
+ /* Initialize the XSCOM regions for the stack registers */
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d",
+ pec->chip_id, pec->index, stack->stack_no);
+ pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack),
+ &pnv_pec_stk_nest_xscom_ops, stack, name,
+ PHB4_PEC_NEST_STK_REGS_COUNT);
+
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d",
+ pec->chip_id, pec->index, stack->stack_no);
+ pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack),
+ &pnv_pec_stk_pci_xscom_ops, stack, name,
+ PHB4_PEC_PCI_STK_REGS_COUNT);
+
+ /* PHB pass-through */
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb",
+ pec->chip_id, pec->index, stack->stack_no);
+ pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(&stack->phb),
+ &pnv_phb4_xscom_ops, &stack->phb, name, 0x40);
+
+ /*
+ * Let the machine/chip realize the PHB object to customize more
+ * easily some fields
+ */
+}
+
+static Property pnv_pec_stk_properties[] = {
+ DEFINE_PROP_UINT32("stack-no", PnvPhb4PecStack, stack_no, 0),
+ DEFINE_PROP_LINK("pec", PnvPhb4PecStack, pec, TYPE_PNV_PHB4_PEC,
+ PnvPhb4PecState *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_pec_stk_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, pnv_pec_stk_properties);
+ dc->realize = pnv_pec_stk_realize;
+ dc->user_creatable = false;
+
+ /* TODO: reset regs ? */
+}
+
+static const TypeInfo pnv_pec_stk_type_info = {
+ .name = TYPE_PNV_PHB4_PEC_STACK,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPhb4PecStack),
+ .instance_init = pnv_pec_stk_instance_init,
+ .class_init = pnv_pec_stk_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_pec_register_types(void)
+{
+ type_register_static(&pnv_pec_type_info);
+ type_register_static(&pnv_pec_stk_type_info);
+}
+
+type_init(pnv_pec_register_types);
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index e27efe9..354828b 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -135,6 +135,8 @@ config XIVE_SPAPR
default y
depends on PSERIES
select XIVE
+ select PCI
+ select PCIE_PORT
config XIVE_KVM
bool
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index e61994c..139c857 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -40,6 +40,7 @@
#include "hw/intc/intc.h"
#include "hw/ipmi/ipmi.h"
#include "target/ppc/mmu-hash64.h"
+#include "hw/pci/msi.h"
#include "hw/ppc/xics.h"
#include "hw/qdev-properties.h"
@@ -615,16 +616,29 @@ static ISABus *pnv_isa_create(PnvChip *chip, Error **errp)
static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon)
{
Pnv8Chip *chip8 = PNV8_CHIP(chip);
+ int i;
ics_pic_print_info(&chip8->psi.ics, mon);
+ for (i = 0; i < chip->num_phbs; i++) {
+ pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon);
+ ics_pic_print_info(&chip8->phbs[i].lsis, mon);
+ }
}
static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon)
{
Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ int i, j;
pnv_xive_pic_print_info(&chip9->xive, mon);
pnv_psi_pic_print_info(&chip9->psi, mon);
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ PnvPhb4PecState *pec = &chip9->pecs[i];
+ for (j = 0; j < pec->num_stacks; j++) {
+ pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon);
+ }
+ }
}
static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip,
@@ -716,7 +730,7 @@ static void pnv_init(MachineState *machine)
exit(1);
}
- fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
+ fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE);
if (fw_size < 0) {
error_report("Could not load OPAL firmware '%s'", fw_filename);
exit(1);
@@ -748,6 +762,9 @@ static void pnv_init(MachineState *machine)
}
}
+ /* MSIs are supported on this platform */
+ msi_nonbroken = true;
+
/*
* Check compatibility of the specified CPU with the machine
* default.
@@ -1014,7 +1031,10 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
static void pnv_chip_power8_instance_init(Object *obj)
{
+ PnvChip *chip = PNV_CHIP(obj);
Pnv8Chip *chip8 = PNV8_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
object_property_add_link(obj, "xics", TYPE_XICS_FABRIC,
(Object **)&chip8->xics,
@@ -1033,6 +1053,17 @@ static void pnv_chip_power8_instance_init(Object *obj)
object_initialize_child(obj, "homer", &chip8->homer, sizeof(chip8->homer),
TYPE_PNV8_HOMER, &error_abort, NULL);
+
+ for (i = 0; i < pcc->num_phbs; i++) {
+ object_initialize_child(obj, "phb[*]", &chip8->phbs[i],
+ sizeof(chip8->phbs[i]), TYPE_PNV_PHB3,
+ &error_abort, NULL);
+ }
+
+ /*
+ * Number of PHBs is the chip default
+ */
+ chip->num_phbs = pcc->num_phbs;
}
static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp)
@@ -1071,6 +1102,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
Pnv8Chip *chip8 = PNV8_CHIP(dev);
Pnv8Psi *psi8 = &chip8->psi;
Error *local_err = NULL;
+ int i;
assert(chip8->xics);
@@ -1151,6 +1183,33 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
/* Homer mmio region */
memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip),
&chip8->homer.regs);
+
+ /* PHB3 controllers */
+ for (i = 0; i < chip->num_phbs; i++) {
+ PnvPHB3 *phb = &chip8->phbs[i];
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ object_property_set_int(OBJECT(phb), i, "index", &error_fatal);
+ object_property_set_int(OBJECT(phb), chip->chip_id, "chip-id",
+ &error_fatal);
+ object_property_set_bool(OBJECT(phb), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ qdev_set_parent_bus(DEVICE(phb), sysbus_get_default());
+
+ /* Populate the XSCOM address space. */
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id,
+ &pbcq->xscom_nest_regs);
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id,
+ &pbcq->xscom_pci_regs);
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id,
+ &pbcq->xscom_spci_regs);
+ }
}
static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr)
@@ -1166,6 +1225,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */
k->cores_mask = POWER8E_CORE_MASK;
+ k->num_phbs = 3;
k->core_pir = pnv_chip_core_pir_p8;
k->intc_create = pnv_chip_power8_intc_create;
k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1189,6 +1249,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
k->cores_mask = POWER8_CORE_MASK;
+ k->num_phbs = 3;
k->core_pir = pnv_chip_core_pir_p8;
k->intc_create = pnv_chip_power8_intc_create;
k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1212,6 +1273,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */
k->cores_mask = POWER8_CORE_MASK;
+ k->num_phbs = 3;
k->core_pir = pnv_chip_core_pir_p8;
k->intc_create = pnv_chip_power8_intc_create;
k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1230,7 +1292,10 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
static void pnv_chip_power9_instance_init(Object *obj)
{
+ PnvChip *chip = PNV_CHIP(obj);
Pnv9Chip *chip9 = PNV9_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
object_initialize_child(obj, "xive", &chip9->xive, sizeof(chip9->xive),
TYPE_PNV_XIVE, &error_abort, NULL);
@@ -1248,6 +1313,17 @@ static void pnv_chip_power9_instance_init(Object *obj)
object_initialize_child(obj, "homer", &chip9->homer, sizeof(chip9->homer),
TYPE_PNV9_HOMER, &error_abort, NULL);
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ object_initialize_child(obj, "pec[*]", &chip9->pecs[i],
+ sizeof(chip9->pecs[i]), TYPE_PNV_PHB4_PEC,
+ &error_abort, NULL);
+ }
+
+ /*
+ * Number of PHBs is the chip default
+ */
+ chip->num_phbs = pcc->num_phbs;
}
static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
@@ -1276,6 +1352,78 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
}
}
+static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp)
+{
+ Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ Error *local_err = NULL;
+ int i, j;
+ int phb_id = 0;
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ PnvPhb4PecState *pec = &chip9->pecs[i];
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+ uint32_t pec_nest_base;
+ uint32_t pec_pci_base;
+
+ object_property_set_int(OBJECT(pec), i, "index", &error_fatal);
+ /*
+ * PEC0 -> 1 stack
+ * PEC1 -> 2 stacks
+ * PEC2 -> 3 stacks
+ */
+ object_property_set_int(OBJECT(pec), i + 1, "num-stacks",
+ &error_fatal);
+ object_property_set_int(OBJECT(pec), chip->chip_id, "chip-id",
+ &error_fatal);
+ object_property_set_link(OBJECT(pec), OBJECT(get_system_memory()),
+ "system-memory", &error_abort);
+ object_property_set_bool(OBJECT(pec), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pec_nest_base = pecc->xscom_nest_base(pec);
+ pec_pci_base = pecc->xscom_pci_base(pec);
+
+ pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
+ pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
+
+ for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs;
+ j++, phb_id++) {
+ PnvPhb4PecStack *stack = &pec->stacks[j];
+ Object *obj = OBJECT(&stack->phb);
+
+ object_property_set_int(obj, phb_id, "index", &error_fatal);
+ object_property_set_int(obj, chip->chip_id, "chip-id",
+ &error_fatal);
+ object_property_set_int(obj, PNV_PHB4_VERSION, "version",
+ &error_fatal);
+ object_property_set_int(obj, PNV_PHB4_DEVICE_ID, "device-id",
+ &error_fatal);
+ object_property_set_link(obj, OBJECT(stack), "stack", &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
+
+ /* Populate the XSCOM address space. */
+ pnv_xscom_add_subregion(chip,
+ pec_nest_base + 0x40 * (stack->stack_no + 1),
+ &stack->nest_regs_mr);
+ pnv_xscom_add_subregion(chip,
+ pec_pci_base + 0x40 * (stack->stack_no + 1),
+ &stack->pci_regs_mr);
+ pnv_xscom_add_subregion(chip,
+ pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
+ 0x40 * stack->stack_no,
+ &stack->phb_regs_mr);
+ }
+ }
+}
+
static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
{
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
@@ -1378,6 +1526,13 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
/* Homer mmio region */
memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip),
&chip9->homer.regs);
+
+ /* PHBs */
+ pnv_chip_power9_phb_realize(chip, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr)
@@ -1404,6 +1559,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
k->xscom_core_base = pnv_chip_power9_xscom_core_base;
k->xscom_pcba = pnv_chip_power9_xscom_pcba;
dc->desc = "PowerNV Chip POWER9";
+ k->num_phbs = 6;
device_class_set_parent_realize(dc, pnv_chip_power9_realize,
&k->parent_realize);
@@ -1533,6 +1689,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
const char *typename = pnv_chip_core_typename(chip);
int i, core_hwid;
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
if (!object_class_by_name(typename)) {
error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
@@ -1571,6 +1728,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
object_property_set_int(OBJECT(pnv_core),
pcc->core_pir(chip, core_hwid),
"pir", &error_fatal);
+ object_property_set_int(OBJECT(pnv_core), pnv->fw_load_addr,
+ "hrmor", &error_fatal);
object_property_set_link(OBJECT(pnv_core), OBJECT(chip), "chip",
&error_abort);
object_property_set_bool(OBJECT(pnv_core), true, "realized",
@@ -1605,6 +1764,7 @@ static Property pnv_chip_properties[] = {
DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
+ DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -1638,14 +1798,23 @@ PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir)
static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
{
PnvMachineState *pnv = PNV_MACHINE(xi);
- int i;
+ int i, j;
for (i = 0; i < pnv->num_chips; i++) {
+ PnvChip *chip = pnv->chips[i];
Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
if (ics_valid_irq(&chip8->psi.ics, irq)) {
return &chip8->psi.ics;
}
+ for (j = 0; j < chip->num_phbs; j++) {
+ if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) {
+ return &chip8->phbs[j].lsis;
+ }
+ if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) {
+ return ICS(&chip8->phbs[j].msis);
+ }
+ }
}
return NULL;
}
@@ -1653,11 +1822,17 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
static void pnv_ics_resend(XICSFabric *xi)
{
PnvMachineState *pnv = PNV_MACHINE(xi);
- int i;
+ int i, j;
for (i = 0; i < pnv->num_chips; i++) {
+ PnvChip *chip = pnv->chips[i];
Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
ics_resend(&chip8->psi.ics);
+ for (j = 0; j < chip->num_phbs; j++) {
+ ics_resend(&chip8->phbs[j].lsis);
+ ics_resend(ICS(&chip8->phbs[j].msis));
+ }
}
}
@@ -1767,6 +1942,22 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
pmc->dt_power_mgt = pnv_dt_power_mgt;
}
+static bool pnv_machine_get_hb(Object *obj, Error **errp)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+
+ return !!pnv->fw_load_addr;
+}
+
+static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+
+ if (value) {
+ pnv->fw_load_addr = 0x8000000;
+ }
+}
+
static void pnv_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1786,6 +1977,13 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data)
*/
mc->default_ram_size = INITRD_LOAD_ADDR + INITRD_MAX_SIZE;
ispc->print_info = pnv_pic_print_info;
+
+ object_class_property_add_bool(oc, "hb-mode",
+ pnv_machine_get_hb, pnv_machine_set_hb,
+ &error_abort);
+ object_class_property_set_description(oc, "hb-mode",
+ "Use a hostboot like boot loader",
+ NULL);
}
#define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 8ca5fbd..2345620 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -40,11 +40,11 @@ static const char *pnv_core_cpu_typename(PnvCore *pc)
return cpu_type;
}
-static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip)
+static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
- PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
cpu_reset(cs);
@@ -56,7 +56,9 @@ static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip)
env->nip = 0x10;
env->msr |= MSR_HVB; /* Hypervisor mode */
- pcc->intc_reset(chip, cpu);
+ env->spr[SPR_HRMOR] = pc->hrmor;
+
+ pcc->intc_reset(pc->chip, cpu);
}
/*
@@ -162,14 +164,14 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = {
.endianness = DEVICE_BIG_ENDIAN,
};
-static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp)
+static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
{
CPUPPCState *env = &cpu->env;
int core_pir;
int thread_index = 0; /* TODO: TCG supports only one thread */
ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
Error *local_err = NULL;
- PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
if (local_err) {
@@ -177,13 +179,13 @@ static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp)
return;
}
- pcc->intc_create(chip, cpu, &local_err);
+ pcc->intc_create(pc->chip, cpu, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
- core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort);
+ core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort);
/*
* The PIR of a thread is the core PIR + the thread index. We will
@@ -203,7 +205,7 @@ static void pnv_core_reset(void *dev)
int i;
for (i = 0; i < cc->nr_threads; i++) {
- pnv_core_cpu_reset(pc->threads[i], pc->chip);
+ pnv_core_cpu_reset(pc, pc->threads[i]);
}
}
@@ -231,8 +233,6 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
snprintf(name, sizeof(name), "thread[%d]", i);
object_property_add_child(OBJECT(pc), name, obj, &error_abort);
- object_property_add_alias(obj, "core-pir", OBJECT(pc),
- "pir", &error_abort);
cpu->machine_data = g_new0(PnvCPUState, 1);
@@ -240,7 +240,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
}
for (j = 0; j < cc->nr_threads; j++) {
- pnv_core_cpu_realize(pc->threads[j], pc->chip, &local_err);
+ pnv_core_cpu_realize(pc, pc->threads[j], &local_err);
if (local_err) {
goto err;
}
@@ -263,12 +263,12 @@ err:
error_propagate(errp, local_err);
}
-static void pnv_core_cpu_unrealize(PowerPCCPU *cpu, PnvChip *chip)
+static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu)
{
PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
- PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
- pcc->intc_destroy(chip, cpu);
+ pcc->intc_destroy(pc->chip, cpu);
cpu_remove_sync(CPU(cpu));
cpu->machine_data = NULL;
g_free(pnv_cpu);
@@ -284,13 +284,14 @@ static void pnv_core_unrealize(DeviceState *dev, Error **errp)
qemu_unregister_reset(pnv_core_reset, pc);
for (i = 0; i < cc->nr_threads; i++) {
- pnv_core_cpu_unrealize(pc->threads[i], pc->chip);
+ pnv_core_cpu_unrealize(pc, pc->threads[i]);
}
g_free(pc->threads);
}
static Property pnv_core_properties[] = {
DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+ DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0),
DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
DEFINE_PROP_END_OF_LIST(),
};
@@ -324,6 +325,7 @@ static void pnv_core_class_init(ObjectClass *oc, void *data)
dc->realize = pnv_core_realize;
dc->unrealize = pnv_core_unrealize;
device_class_set_props(dc, pnv_core_properties);
+ dc->user_creatable = false;
}
#define DEFINE_PNV_CORE_TYPE(family, cpu_model) \
@@ -422,6 +424,7 @@ static void pnv_quad_class_init(ObjectClass *oc, void *data)
dc->realize = pnv_quad_realize;
device_class_set_props(dc, pnv_quad_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_quad_info = {
diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
index 93ae42f..9a26262 100644
--- a/hw/ppc/pnv_homer.c
+++ b/hw/ppc/pnv_homer.c
@@ -360,6 +360,7 @@ static void pnv_homer_class_init(ObjectClass *klass, void *data)
dc->realize = pnv_homer_realize;
dc->desc = "PowerNV HOMER Memory";
device_class_set_props(dc, pnv_homer_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_homer_type_info = {
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index 22b2055..5989d72 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -762,6 +762,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data)
dc->realize = pnv_lpc_realize;
dc->desc = "PowerNV LPC Controller";
device_class_set_props(dc, pnv_lpc_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_lpc_info = {
@@ -825,6 +826,7 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp)
qemu_irq *irqs;
qemu_irq_handler handler;
PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+ bool hostboot_mode = !!pnv->fw_load_addr;
/* let isa_bus_new() create its own bridge on SysBus otherwise
* devices speficied on the command line won't find the bus and
@@ -859,7 +861,9 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp)
* Start disabled. The HIOMAP protocol will activate the mapping
* with HIOMAP_C_CREATE_WRITE_WINDOW
*/
- memory_region_set_enabled(&pnv->pnor->mmio, false);
+ if (!hostboot_mode) {
+ memory_region_set_enabled(&pnv->pnor->mmio, false);
+ }
return isa_bus;
}
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index 2173fac..5a716c2 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -280,6 +280,7 @@ static void pnv_occ_class_init(ObjectClass *klass, void *data)
dc->realize = pnv_occ_realize;
dc->desc = "PowerNV OCC Controller";
device_class_set_props(dc, pnv_occ_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_occ_type_info = {
diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c
index f761d8d..c365ee5 100644
--- a/hw/ppc/pnv_pnor.c
+++ b/hw/ppc/pnv_pnor.c
@@ -11,6 +11,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
+#include "qemu/units.h"
#include "sysemu/block-backend.h"
#include "sysemu/blockdev.h"
#include "hw/loader.h"
@@ -46,7 +47,8 @@ static void pnv_pnor_update(PnvPnor *s, int offset, int size)
ret = blk_pwrite(s->blk, offset, s->storage + offset,
offset_end - offset, 0);
if (ret < 0) {
- error_report("Could not update PNOR: %s", strerror(-ret));
+ error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset,
+ strerror(-ret));
}
}
@@ -111,7 +113,7 @@ static void pnv_pnor_realize(DeviceState *dev, Error **errp)
}
static Property pnv_pnor_properties[] = {
- DEFINE_PROP_INT64("size", PnvPnor, size, 128 << 20),
+ DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB),
DEFINE_PROP_DRIVE("drive", PnvPnor, blk),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 4c5fa29..4a11fb1 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1490,24 +1490,6 @@ int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn),
}
/*****************************************************************************/
-/* Debug port */
-void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val)
-{
- addr &= 0xF;
- switch (addr) {
- case 0:
- printf("%c", val);
- break;
- case 1:
- printf("\n");
- fflush(stdout);
- break;
- case 2:
- printf("Set loglevel to %04" PRIx32 "\n", val);
- qemu_set_log(val | 0x100);
- break;
- }
-}
int ppc_cpu_pir(PowerPCCPU *cpu)
{
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 862345c..111cc80 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -42,7 +42,7 @@
#include "hw/loader.h"
#include "hw/rtc/mc146818rtc.h"
#include "hw/isa/pc87312.h"
-#include "hw/net/ne2000-isa.h"
+#include "hw/qdev-properties.h"
#include "sysemu/arch_init.h"
#include "sysemu/kvm.h"
#include "sysemu/qtest.h"
@@ -60,178 +60,9 @@
#define CFG_ADDR 0xf0000510
-#define BIOS_SIZE (1 * MiB)
-#define BIOS_FILENAME "ppc_rom.bin"
#define KERNEL_LOAD_ADDR 0x01000000
#define INITRD_LOAD_ADDR 0x01800000
-/* Constants for devices init */
-static const int ide_iobase[2] = { 0x1f0, 0x170 };
-static const int ide_iobase2[2] = { 0x3f6, 0x376 };
-static const int ide_irq[2] = { 13, 13 };
-
-#define NE2000_NB_MAX 6
-
-static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
-static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
-
-/* ISA IO ports bridge */
-#define PPC_IO_BASE 0x80000000
-
-/* Fake super-io ports for PREP platform (Intel 82378ZB) */
-typedef struct sysctrl_t {
- qemu_irq reset_irq;
- Nvram *nvram;
- uint8_t state;
- uint8_t syscontrol;
- int contiguous_map;
- qemu_irq contiguous_map_irq;
- int endian;
-} sysctrl_t;
-
-enum {
- STATE_HARDFILE = 0x01,
-};
-
-static sysctrl_t *sysctrl;
-
-static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
-{
- sysctrl_t *sysctrl = opaque;
-
- trace_prep_io_800_writeb(addr - PPC_IO_BASE, val);
- switch (addr) {
- case 0x0092:
- /* Special port 92 */
- /* Check soft reset asked */
- if (val & 0x01) {
- qemu_irq_raise(sysctrl->reset_irq);
- } else {
- qemu_irq_lower(sysctrl->reset_irq);
- }
- /* Check LE mode */
- if (val & 0x02) {
- sysctrl->endian = 1;
- } else {
- sysctrl->endian = 0;
- }
- break;
- case 0x0800:
- /* Motorola CPU configuration register : read-only */
- break;
- case 0x0802:
- /* Motorola base module feature register : read-only */
- break;
- case 0x0803:
- /* Motorola base module status register : read-only */
- break;
- case 0x0808:
- /* Hardfile light register */
- if (val & 1)
- sysctrl->state |= STATE_HARDFILE;
- else
- sysctrl->state &= ~STATE_HARDFILE;
- break;
- case 0x0810:
- /* Password protect 1 register */
- if (sysctrl->nvram != NULL) {
- NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram);
- (k->toggle_lock)(sysctrl->nvram, 1);
- }
- break;
- case 0x0812:
- /* Password protect 2 register */
- if (sysctrl->nvram != NULL) {
- NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram);
- (k->toggle_lock)(sysctrl->nvram, 2);
- }
- break;
- case 0x0814:
- /* L2 invalidate register */
- // tlb_flush(first_cpu, 1);
- break;
- case 0x081C:
- /* system control register */
- sysctrl->syscontrol = val & 0x0F;
- break;
- case 0x0850:
- /* I/O map type register */
- sysctrl->contiguous_map = val & 0x01;
- qemu_set_irq(sysctrl->contiguous_map_irq, sysctrl->contiguous_map);
- break;
- default:
- printf("ERROR: unaffected IO port write: %04" PRIx32
- " => %02" PRIx32"\n", addr, val);
- break;
- }
-}
-
-static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
-{
- sysctrl_t *sysctrl = opaque;
- uint32_t retval = 0xFF;
-
- switch (addr) {
- case 0x0092:
- /* Special port 92 */
- retval = sysctrl->endian << 1;
- break;
- case 0x0800:
- /* Motorola CPU configuration register */
- retval = 0xEF; /* MPC750 */
- break;
- case 0x0802:
- /* Motorola Base module feature register */
- retval = 0xAD; /* No ESCC, PMC slot neither ethernet */
- break;
- case 0x0803:
- /* Motorola base module status register */
- retval = 0xE0; /* Standard MPC750 */
- break;
- case 0x080C:
- /* Equipment present register:
- * no L2 cache
- * no upgrade processor
- * no cards in PCI slots
- * SCSI fuse is bad
- */
- retval = 0x3C;
- break;
- case 0x0810:
- /* Motorola base module extended feature register */
- retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */
- break;
- case 0x0814:
- /* L2 invalidate: don't care */
- break;
- case 0x0818:
- /* Keylock */
- retval = 0x00;
- break;
- case 0x081C:
- /* system control register
- * 7 - 6 / 1 - 0: L2 cache enable
- */
- retval = sysctrl->syscontrol;
- break;
- case 0x0823:
- /* */
- retval = 0x03; /* no L2 cache */
- break;
- case 0x0850:
- /* I/O map type register */
- retval = sysctrl->contiguous_map;
- break;
- default:
- printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
- break;
- }
- trace_prep_io_800_readb(addr - PPC_IO_BASE, retval);
-
- return retval;
-}
-
-
#define NVRAM_SIZE 0x2000
static void fw_cfg_boot_set(void *opaque, const char *boot_device,
@@ -247,17 +78,6 @@ static void ppc_prep_reset(void *opaque)
cpu_reset(CPU(cpu));
}
-static const MemoryRegionPortio prep_portio_list[] = {
- /* System control ports */
- { 0x0092, 1, 1, .read = PREP_io_800_readb, .write = PREP_io_800_writeb, },
- { 0x0800, 0x52, 1,
- .read = PREP_io_800_readb, .write = PREP_io_800_writeb, },
- /* Special port to get debug messages from Open-Firmware */
- { 0x0F00, 4, 1, .write = PPC_debug_write, },
- PORTIO_END_OF_LIST(),
-};
-
-static PortioList prep_port_list;
/*****************************************************************************/
/* NVRAM helpers */
@@ -397,207 +217,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size,
return 0;
}
-/* PowerPC PREP hardware initialisation */
-static void ppc_prep_init(MachineState *machine)
-{
- ram_addr_t ram_size = machine->ram_size;
- const char *kernel_filename = machine->kernel_filename;
- const char *kernel_cmdline = machine->kernel_cmdline;
- const char *initrd_filename = machine->initrd_filename;
- const char *boot_device = machine->boot_order;
- MemoryRegion *sysmem = get_system_memory();
- PowerPCCPU *cpu = NULL;
- CPUPPCState *env = NULL;
- Nvram *m48t59;
-#if 0
- MemoryRegion *xcsr = g_new(MemoryRegion, 1);
-#endif
- int linux_boot, i, nb_nics1;
- MemoryRegion *ram = g_new(MemoryRegion, 1);
- uint32_t kernel_base, initrd_base;
- long kernel_size, initrd_size;
- DeviceState *dev;
- PCIHostState *pcihost;
- PCIBus *pci_bus;
- PCIDevice *pci;
- ISABus *isa_bus;
- ISADevice *isa;
- int ppc_boot_device;
- DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
-
- sysctrl = g_malloc0(sizeof(sysctrl_t));
-
- linux_boot = (kernel_filename != NULL);
-
- /* init CPUs */
- for (i = 0; i < machine->smp.cpus; i++) {
- cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
- env = &cpu->env;
-
- if (env->flags & POWERPC_FLAG_RTC_CLK) {
- /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
- cpu_ppc_tb_init(env, 7812500UL);
- } else {
- /* Set time-base frequency to 100 Mhz */
- cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
- }
- qemu_register_reset(ppc_prep_reset, cpu);
- }
-
- /* allocate RAM */
- memory_region_allocate_system_memory(ram, NULL, "ppc_prep.ram", ram_size);
- memory_region_add_subregion(sysmem, 0, ram);
-
- if (linux_boot) {
- kernel_base = KERNEL_LOAD_ADDR;
- /* now we can load the kernel */
- kernel_size = load_image_targphys(kernel_filename, kernel_base,
- ram_size - kernel_base);
- if (kernel_size < 0) {
- error_report("could not load kernel '%s'", kernel_filename);
- exit(1);
- }
- /* load initrd */
- if (initrd_filename) {
- initrd_base = INITRD_LOAD_ADDR;
- initrd_size = load_image_targphys(initrd_filename, initrd_base,
- ram_size - initrd_base);
- if (initrd_size < 0) {
- error_report("could not load initial ram disk '%s'",
- initrd_filename);
- exit(1);
- }
- } else {
- initrd_base = 0;
- initrd_size = 0;
- }
- ppc_boot_device = 'm';
- } else {
- kernel_base = 0;
- kernel_size = 0;
- initrd_base = 0;
- initrd_size = 0;
- ppc_boot_device = '\0';
- /* For now, OHW cannot boot from the network. */
- for (i = 0; boot_device[i] != '\0'; i++) {
- if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
- ppc_boot_device = boot_device[i];
- break;
- }
- }
- if (ppc_boot_device == '\0') {
- error_report("No valid boot device for Mac99 machine");
- exit(1);
- }
- }
-
- if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
- error_report("Only 6xx bus is supported on PREP machine");
- exit(1);
- }
-
- dev = qdev_create(NULL, "raven-pcihost");
- if (bios_name == NULL) {
- bios_name = BIOS_FILENAME;
- }
- qdev_prop_set_string(dev, "bios-name", bios_name);
- qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
- qdev_prop_set_bit(dev, "is-legacy-prep", true);
- pcihost = PCI_HOST_BRIDGE(dev);
- object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL);
- qdev_init_nofail(dev);
- pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
- if (pci_bus == NULL) {
- error_report("Couldn't create PCI host controller");
- exit(1);
- }
- sysctrl->contiguous_map_irq = qdev_get_gpio_in(dev, 0);
-
- /* PCI -> ISA bridge */
- pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378");
- cpu = POWERPC_CPU(first_cpu);
- qdev_connect_gpio_out(&pci->qdev, 0,
- cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
- sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
- sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
- sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
- sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11));
- isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci), "isa.0"));
-
- /* Super I/O (parallel + serial ports) */
- isa = isa_create(isa_bus, TYPE_PC87312_SUPERIO);
- dev = DEVICE(isa);
- qdev_prop_set_uint8(dev, "config", 13); /* fdc, ser0, ser1, par0 */
- qdev_init_nofail(dev);
-
- /* init basic PC hardware */
- pci_vga_init(pci_bus);
-
- nb_nics1 = nb_nics;
- if (nb_nics1 > NE2000_NB_MAX)
- nb_nics1 = NE2000_NB_MAX;
- for(i = 0; i < nb_nics1; i++) {
- if (nd_table[i].model == NULL) {
- nd_table[i].model = g_strdup("ne2k_isa");
- }
- if (strcmp(nd_table[i].model, "ne2k_isa") == 0) {
- isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i],
- &nd_table[i]);
- } else {
- pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
- }
- }
-
- ide_drive_get(hd, ARRAY_SIZE(hd));
- for(i = 0; i < MAX_IDE_BUS; i++) {
- isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i],
- hd[2 * i],
- hd[2 * i + 1]);
- }
-
- cpu = POWERPC_CPU(first_cpu);
- sysctrl->reset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET];
-
- portio_list_init(&prep_port_list, NULL, prep_portio_list, sysctrl, "prep");
- portio_list_add(&prep_port_list, isa_address_space_io(isa), 0x0);
-
- /*
- * PowerPC control and status register group: unimplemented,
- * would be at address 0xFEFF0000.
- */
-
- if (machine_usb(machine)) {
- pci_create_simple(pci_bus, -1, "pci-ohci");
- }
-
- m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 2000, 59);
- if (m48t59 == NULL)
- return;
- sysctrl->nvram = m48t59;
-
- /* Initialise NVRAM */
- PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", ram_size,
- ppc_boot_device,
- kernel_base, kernel_size,
- kernel_cmdline,
- initrd_base, initrd_size,
- /* XXX: need an option to load a NVRAM image */
- 0,
- graphic_width, graphic_height, graphic_depth);
-}
-
-static void prep_machine_init(MachineClass *mc)
-{
- mc->deprecation_reason = "use 40p machine type instead";
- mc->desc = "PowerPC PREP platform";
- mc->init = ppc_prep_init;
- mc->block_default_type = IF_IDE;
- mc->max_cpus = MAX_CPUS;
- mc->default_boot_order = "cad";
- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("602");
- mc->default_display = "std";
-}
-
static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
{
uint16_t checksum = *(uint16_t *)opaque;
@@ -821,4 +440,3 @@ static void ibm_40p_machine_init(MachineClass *mc)
}
DEFINE_MACHINE("40p", ibm_40p_machine_init)
-DEFINE_MACHINE("prep", prep_machine_init)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a0076e5..c9b2e0a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
#include "migration/qemu-file-types.h"
#include "migration/global_state.h"
#include "migration/register.h"
+#include "migration/blocker.h"
#include "mmu-hash64.h"
#include "mmu-book3s-v3.h"
#include "cpu-models.h"
@@ -1677,6 +1678,14 @@ static void spapr_machine_reset(MachineState *machine)
first_ppc_cpu->env.gpr[5] = 0;
spapr->cas_reboot = false;
+
+ spapr->mc_status = -1;
+ spapr->guest_machine_check_addr = -1;
+
+ /* Signal all vCPUs waiting on this condition */
+ qemu_cond_broadcast(&spapr->mc_delivery_cond);
+
+ migrate_del_blocker(spapr->fwnmi_migration_blocker);
}
static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1959,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = {
},
};
+static bool spapr_fwnmi_needed(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ /*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+ if (spapr->mc_status != -1) {
+ warn_report("A machine check is being handled during migration. The"
+ "handler may run and log hardware error on the destination");
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+ .name = "spapr_machine_check",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_fwnmi_needed,
+ .pre_save = spapr_fwnmi_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+ VMSTATE_INT32(mc_status, SpaprMachineState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_spapr = {
.name = "spapr",
.version_id = 3,
@@ -1992,6 +2037,8 @@ static const VMStateDescription vmstate_spapr = {
&vmstate_spapr_dtb,
&vmstate_spapr_cap_large_decr,
&vmstate_spapr_cap_ccf_assist,
+ &vmstate_spapr_cap_fwnmi,
+ &vmstate_spapr_machine_check,
NULL
}
};
@@ -2807,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine)
spapr_create_lmb_dr_connectors(spapr);
}
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+ /* Create the error string for live migration blocker */
+ error_setg(&spapr->fwnmi_migration_blocker,
+ "A machine check is being handled during migration. The handler"
+ "may run and log hardware error on the destination");
+ }
+
/* Set up RTAS event infrastructure */
spapr_events_init(spapr);
@@ -2970,6 +3024,8 @@ static void spapr_machine_init(MachineState *machine)
kvmppc_spapr_enable_inkernel_multitce();
}
+
+ qemu_cond_init(&spapr->mc_delivery_cond);
}
static int spapr_kvm_type(MachineState *machine, const char *vm_type)
@@ -4397,7 +4453,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
spapr_caps_add_properties(smc, &error_abort);
smc->irq = &spapr_irq_dual;
smc->dr_phb_enabled = true;
@@ -4465,8 +4522,12 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
*/
static void spapr_machine_4_2_class_options(MachineClass *mc)
{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
spapr_machine_5_0_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
}
DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2..8b27d3a 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -485,17 +485,48 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist();
if (tcg_enabled() && val) {
- /* TODO - for now only allow broken for TCG */
- error_setg(errp,
-"Requested count cache flush assist capability level not supported by tcg,"
- " try appending -machine cap-ccf-assist=off");
+ /* TCG doesn't implement anything here, but allow with a warning */
+ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on");
} else if (kvm_enabled() && (val > kvm_val)) {
+ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch();
+
+ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) {
+ /*
+ * If we don't have CCF assist on the host, the assist
+ * instruction is a harmless no-op. It won't correctly
+ * implement the cache count flush *but* if we have
+ * count-cache-disabled in the host, that flush is
+ * unnnecessary. So, specifically allow this case. This
+ * allows us to have better performance on POWER9 DD2.3,
+ * while still working on POWER9 DD2.2 and POWER8 host
+ * cpus.
+ */
+ return;
+ }
error_setg(errp,
"Requested count cache flush assist capability level not supported by kvm,"
" try appending -machine cap-ccf-assist=off");
}
}
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+ Error **errp)
+{
+ if (!val) {
+ return; /* Disabled by default */
+ }
+
+ if (tcg_enabled()) {
+ warn_report("Firmware Assisted Non-Maskable Interrupts(FWNMI) not "
+ "supported in TCG");
+ } else if (kvm_enabled()) {
+ if (kvmppc_set_fwnmi() < 0) {
+ error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
+ "not supported by KVM");
+ }
+ }
+}
+
SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
[SPAPR_CAP_HTM] = {
.name = "htm",
@@ -595,6 +626,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
.type = "bool",
.apply = cap_ccf_assist_apply,
},
+ [SPAPR_CAP_FWNMI_MCE] = {
+ .name = "fwnmi-mce",
+ .description = "Handle fwnmi machine check exceptions",
+ .index = SPAPR_CAP_FWNMI_MCE,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_fwnmi_mce_apply,
+ },
};
static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +774,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
void spapr_caps_init(SpaprMachineState *spapr)
{
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e00..884e455 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,8 +40,10 @@
#include "hw/ppc/spapr_drc.h"
#include "qemu/help_option.h"
#include "qemu/bcd.h"
+#include "qemu/main-loop.h"
#include "hw/ppc/spapr_ovec.h"
#include <libfdt.h>
+#include "migration/blocker.h"
#define RTAS_LOG_VERSION_MASK 0xff000000
#define RTAS_LOG_VERSION_6 0x06000000
@@ -213,6 +215,104 @@ struct hp_extended_log {
struct rtas_event_log_v6_hp hp;
} QEMU_PACKED;
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */
+ struct rtas_event_log_v6_section_header hdr;
+ uint32_t fru_id;
+ uint32_t proc_id;
+ uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE 0
+#define RTAS_LOG_V6_MC_TYPE_SLB 1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB 4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7
+ uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY 0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY 1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3
+#define RTAS_LOG_V6_MC_TLB_PARITY 1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+ struct rtas_event_log_v6 v6hdr;
+ struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ uint8_t error_type;
+ uint8_t error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ uint8_t error_type;
+ uint8_t error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x00008000, false,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00004000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000800, true,
+ RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000400, true,
+ RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000080, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000100, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
typedef enum EventClass {
EVENT_CLASS_INTERNAL_ERRORS = 0,
EVENT_CLASS_EPOW = 1,
@@ -622,6 +722,175 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
}
+static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
+ struct mc_extended_log *ext_elog)
+{
+ int i;
+ CPUPPCState *env = &cpu->env;
+ uint32_t summary;
+ uint64_t dsisr = env->spr[SPR_DSISR];
+
+ summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
+ if (recovered) {
+ summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
+ } else {
+ summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
+ }
+
+ if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
+ for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) {
+ if (!(dsisr & mc_derror_table[i].dsisr_value)) {
+ continue;
+ }
+
+ ext_elog->mc.error_type = mc_derror_table[i].error_type;
+ ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
+ if (mc_derror_table[i].dar_valid) {
+ ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+ }
+
+ summary |= mc_derror_table[i].initiator
+ | mc_derror_table[i].severity;
+
+ return summary;
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) {
+ if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
+ mc_ierror_table[i].srr1_value) {
+ continue;
+ }
+
+ ext_elog->mc.error_type = mc_ierror_table[i].error_type;
+ ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
+ if (mc_ierror_table[i].nip_valid) {
+ ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+ }
+
+ summary |= mc_ierror_table[i].initiator
+ | mc_ierror_table[i].severity;
+
+ return summary;
+ }
+ }
+
+ summary |= RTAS_LOG_INITIATOR_CPU;
+ return summary;
+}
+
+static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ CPUState *cs = CPU(cpu);
+ uint64_t rtas_addr;
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ target_ulong msr = 0;
+ struct rtas_error_log log;
+ struct mc_extended_log *ext_elog;
+ uint32_t summary;
+
+ /*
+ * Properly set bits in MSR before we invoke the handler.
+ * SRR0/1, DAR and DSISR are properly set by KVM
+ */
+ if (!(*pcc->interrupts_big_endian)(cpu)) {
+ msr |= (1ULL << MSR_LE);
+ }
+
+ if (env->msr & (1ULL << MSR_SF)) {
+ msr |= (1ULL << MSR_SF);
+ }
+
+ msr |= (1ULL << MSR_ME);
+
+ ext_elog = g_malloc0(sizeof(*ext_elog));
+ summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
+
+ log.summary = cpu_to_be32(summary);
+ log.extended_length = cpu_to_be32(sizeof(*ext_elog));
+
+ spapr_init_v6hdr(&ext_elog->v6hdr);
+ ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
+ ext_elog->mc.hdr.section_length =
+ cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
+ ext_elog->mc.hdr.section_version = 1;
+
+ /* get rtas addr from fdt */
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ /* Unable to fetch rtas_addr. Hence reset the guest */
+ ppc_cpu_do_system_reset(cs);
+ g_free(ext_elog);
+ return;
+ }
+
+ stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
+ env->gpr[3]);
+ cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+ sizeof(env->gpr[3]), &log, sizeof(log));
+ cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+ sizeof(env->gpr[3]) + sizeof(log), ext_elog,
+ sizeof(*ext_elog));
+
+ env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
+ env->msr = msr;
+ env->nip = spapr->guest_machine_check_addr;
+
+ g_free(ext_elog);
+}
+
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ CPUState *cs = CPU(cpu);
+ int ret;
+ Error *local_err = NULL;
+
+ if (spapr->guest_machine_check_addr == -1) {
+ /*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+ cs->exception_index = POWERPC_EXCP_MCHECK;
+ ppc_cpu_do_interrupt(cs);
+ return;
+ }
+
+ while (spapr->mc_status != -1) {
+ /*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+ if (spapr->mc_status == cpu->vcpu_id) {
+ qemu_system_guest_panicked(NULL);
+ return;
+ }
+ qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
+ /* Meanwhile if the system is reset, then just return */
+ if (spapr->guest_machine_check_addr == -1) {
+ return;
+ }
+ }
+
+ ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
+ if (ret == -EBUSY) {
+ /*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+ warn_report("Received a fwnmi while migration was in progress");
+ }
+
+ spapr->mc_status = cpu->vcpu_id;
+ spapr_mce_dispatch_elog(cpu, recovered);
+}
+
static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f1799b1..b8bb66b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1676,6 +1676,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
Error *local_err = NULL;
bool raw_mode_supported = false;
bool guest_xive;
+ CPUState *cs;
+
+ /* CAS is supposed to be called early when only the boot vCPU is active. */
+ CPU_FOREACH(cs) {
+ if (cs == CPU(cpu)) {
+ continue;
+ }
+ if (!cs->halted) {
+ warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
+ return H_MULTI_THREADS_ACTIVE;
+ }
+ }
cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
if (local_err) {
@@ -1703,7 +1715,15 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
ov_table = addr;
ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
+ if (!ov1_guest) {
+ warn_report("guest didn't provide option vector 1");
+ return H_PARAMETER;
+ }
ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+ if (!ov5_guest) {
+ warn_report("guest didn't provide option vector 5");
+ return H_PARAMETER;
+ }
if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
error_report("guest requested hash and radix MMU, which is invalid.");
exit(EXIT_FAILURE);
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 85135e0..883fe28 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
#include "hw/ppc/fdt.h"
#include "target/ppc/mmu-hash64.h"
#include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
@@ -399,6 +400,62 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
rtas_st(rets, 1, 100);
}
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ hwaddr rtas_addr;
+
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ spapr->guest_machine_check_addr = rtas_ld(args, 1);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ if (spapr->guest_machine_check_addr == -1) {
+ /* NMI register not called */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ if (spapr->mc_status != cpu->vcpu_id) {
+ /* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ /*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+ spapr->mc_status = -1;
+ qemu_cond_signal(&spapr->mc_delivery_cond);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
static struct rtas_call {
const char *name;
spapr_rtas_fn fn;
@@ -476,6 +533,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas)
}
}
+hwaddr spapr_get_rtas_addr(void)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ int rtas_node;
+ const fdt32_t *rtas_data;
+ void *fdt = spapr->fdt_blob;
+
+ /* fetch rtas addr from fdt */
+ rtas_node = fdt_path_offset(fdt, "/rtas");
+ if (rtas_node < 0) {
+ return 0;
+ }
+
+ rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL);
+ if (!rtas_data) {
+ return 0;
+ }
+
+ /*
+ * We assume that the OS called RTAS instantiate-rtas, but some other
+ * OS might call RTAS instantiate-rtas-64 instead. This fine as of now
+ * as SLOF only supports 32-bit variant.
+ */
+ return (hwaddr)fdt32_to_cpu(*rtas_data);
+}
+
static void core_rtas_register_types(void)
{
spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
@@ -501,6 +584,10 @@ static void core_rtas_register_types(void)
rtas_set_power_level);
spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
rtas_get_power_level);
+ spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+ rtas_ibm_nmi_register);
+ spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+ rtas_ibm_nmi_interlock);
}
type_init(core_rtas_register_types)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index f14944e..0b085ea 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -87,6 +87,7 @@ static int vio_make_devnode(SpaprVioDevice *dev,
SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
int vdevice_off, node_off, ret;
char *dt_name;
+ const char *dt_compatible;
vdevice_off = fdt_path_offset(fdt, "/vdevice");
if (vdevice_off < 0) {
@@ -113,9 +114,15 @@ static int vio_make_devnode(SpaprVioDevice *dev,
}
}
- if (pc->dt_compatible) {
+ if (pc->get_dt_compatible) {
+ dt_compatible = pc->get_dt_compatible(dev);
+ } else {
+ dt_compatible = pc->dt_compatible;
+ }
+
+ if (dt_compatible) {
ret = fdt_setprop_string(fdt, node_off, "compatible",
- pc->dt_compatible);
+ dt_compatible);
if (ret < 0) {
return ret;
}
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 7526947..91dd00e 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -89,10 +89,7 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env,
tlb->PID = 0;
}
-static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
- int do_init,
- const char *cpu_type,
- uint32_t sysclk)
+static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk)
{
PowerPCCPU *cpu;
CPUPPCState *env;
@@ -213,7 +210,7 @@ static void virtex_init(MachineState *machine)
int i;
/* init CPUs */
- cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_type, 400000000);
+ cpu = ppc440_init_xilinx(machine->cpu_type, 400000000);
env = &cpu->env;
if (env->mmu_model != POWERPC_MMU_BOOKE) {
diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index 4c8ee87..9e67d99 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -22,3 +22,9 @@ config TPM_EMULATOR
bool
default y
depends on TPMDEV
+
+config TPM_SPAPR
+ bool
+ default y
+ depends on TPM && PSERIES
+ select TPMDEV
diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index de0b85d..85eb99a 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -4,3 +4,4 @@ common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o
common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o
common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o
+obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o
diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c
new file mode 100644
index 0000000..ce65eb2
--- /dev/null
+++ b/hw/tpm/tpm_spapr.c
@@ -0,0 +1,429 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtual TPM
+ *
+ * Copyright (c) 2015, 2017, 2019 IBM Corporation.
+ *
+ * Authors:
+ * Stefan Berger <stefanb@linux.vnet.ibm.com>
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+
+#include "sysemu/tpm_backend.h"
+#include "tpm_int.h"
+#include "tpm_util.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "trace.h"
+
+#define DEBUG_SPAPR 0
+
+#define VIO_SPAPR_VTPM(obj) \
+ OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR)
+
+typedef struct TpmCrq {
+ uint8_t valid; /* 0x80: cmd; 0xc0: init crq */
+ /* 0x81-0x83: CRQ message response */
+ uint8_t msg; /* see below */
+ uint16_t len; /* len of TPM request; len of TPM response */
+ uint32_t data; /* rtce_dma_handle when sending TPM request */
+ uint64_t reserved;
+} TpmCrq;
+
+#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND 0xC0
+#define SPAPR_VTPM_VALID_COMMAND 0x80
+#define SPAPR_VTPM_MSG_RESULT 0x80
+
+/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */
+#define SPAPR_VTPM_INIT_CRQ_RESULT 0x1
+#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT 0x2
+
+/* msg types for valid = SPAPR_VTPM_VALID_CMD */
+#define SPAPR_VTPM_GET_VERSION 0x1
+#define SPAPR_VTPM_TPM_COMMAND 0x2
+#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE 0x3
+#define SPAPR_VTPM_PREPARE_TO_SUSPEND 0x4
+
+/* response error messages */
+#define SPAPR_VTPM_VTPM_ERROR 0xff
+
+/* error codes */
+#define SPAPR_VTPM_ERR_COPY_IN_FAILED 0x3
+#define SPAPR_VTPM_ERR_COPY_OUT_FAILED 0x4
+
+#define TPM_SPAPR_BUFFER_MAX 4096
+
+typedef struct {
+ SpaprVioDevice vdev;
+
+ TpmCrq crq; /* track single TPM command */
+
+ uint8_t state;
+#define SPAPR_VTPM_STATE_NONE 0
+#define SPAPR_VTPM_STATE_EXECUTION 1
+#define SPAPR_VTPM_STATE_COMPLETION 2
+
+ unsigned char *buffer;
+
+ uint32_t numbytes; /* number of bytes to deliver on resume */
+
+ TPMBackendCmd cmd;
+
+ TPMBackend *be_driver;
+ TPMVersion be_tpm_version;
+
+ size_t be_buffer_size;
+} SpaprTpmState;
+
+/*
+ * Send a request to the TPM.
+ */
+static void tpm_spapr_tpm_send(SpaprTpmState *s)
+{
+ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
+ }
+
+ s->state = SPAPR_VTPM_STATE_EXECUTION;
+ s->cmd = (TPMBackendCmd) {
+ .locty = 0,
+ .in = s->buffer,
+ .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size),
+ .out = s->buffer,
+ .out_len = s->be_buffer_size,
+ };
+
+ tpm_backend_deliver_request(s->be_driver, &s->cmd);
+}
+
+static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr)
+{
+ long rc;
+
+ /* a max. of be_buffer_size bytes can be transported */
+ rc = spapr_vio_dma_read(&s->vdev, dataptr,
+ s->buffer, s->be_buffer_size);
+ if (rc) {
+ error_report("tpm_spapr_got_payload: DMA read failure");
+ }
+ /* let vTPM handle any malformed request */
+ tpm_spapr_tpm_send(s);
+
+ return rc;
+}
+
+static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq)
+{
+ return spapr_vio_send_crq(dev, (uint8_t *)crq);
+}
+
+static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+ TpmCrq local_crq;
+ TpmCrq *crq = &s->crq; /* requests only */
+ int rc;
+ uint8_t valid = crq_data[0];
+ uint8_t msg = crq_data[1];
+
+ trace_tpm_spapr_do_crq(valid, msg);
+
+ switch (valid) {
+ case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */
+
+ /* Respond to initialization request */
+ switch (msg) {
+ case SPAPR_VTPM_INIT_CRQ_RESULT:
+ trace_tpm_spapr_do_crq_crq_result();
+ memset(&local_crq, 0, sizeof(local_crq));
+ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND;
+ local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT;
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT:
+ trace_tpm_spapr_do_crq_crq_complete_result();
+ memset(&local_crq, 0, sizeof(local_crq));
+ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND;
+ local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT;
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+ }
+
+ break;
+ case SPAPR_VTPM_VALID_COMMAND: /* Payloads */
+ switch (msg) {
+ case SPAPR_VTPM_TPM_COMMAND:
+ trace_tpm_spapr_do_crq_tpm_command();
+ if (s->state == SPAPR_VTPM_STATE_EXECUTION) {
+ return H_BUSY;
+ }
+ memcpy(crq, crq_data, sizeof(*crq));
+
+ rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data));
+
+ if (rc == H_SUCCESS) {
+ crq->valid = be16_to_cpu(0);
+ } else {
+ local_crq.valid = SPAPR_VTPM_MSG_RESULT;
+ local_crq.msg = SPAPR_VTPM_VTPM_ERROR;
+ local_crq.len = cpu_to_be16(0);
+ local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED);
+ spapr_tpm_send_crq(dev, &local_crq);
+ }
+ break;
+
+ case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE:
+ trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size);
+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+ local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE |
+ SPAPR_VTPM_MSG_RESULT;
+ local_crq.len = cpu_to_be16(s->be_buffer_size);
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ case SPAPR_VTPM_GET_VERSION:
+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+ local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT;
+ local_crq.len = cpu_to_be16(0);
+ switch (s->be_tpm_version) {
+ case TPM_VERSION_1_2:
+ local_crq.data = cpu_to_be32(1);
+ break;
+ case TPM_VERSION_2_0:
+ local_crq.data = cpu_to_be32(2);
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+ trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data));
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ case SPAPR_VTPM_PREPARE_TO_SUSPEND:
+ trace_tpm_spapr_do_crq_prepare_to_suspend();
+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+ local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND |
+ SPAPR_VTPM_MSG_RESULT;
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ default:
+ trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg);
+ }
+ break;
+ default:
+ trace_tpm_spapr_do_crq_unknown_crq(valid, msg);
+ };
+
+ return H_SUCCESS;
+}
+
+static void tpm_spapr_request_completed(TPMIf *ti, int ret)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(ti);
+ TpmCrq *crq = &s->crq;
+ uint32_t len;
+ int rc;
+
+ s->state = SPAPR_VTPM_STATE_COMPLETION;
+
+ /* a max. of be_buffer_size bytes can be transported */
+ len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size);
+
+ if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+ trace_tpm_spapr_caught_response(len);
+ /* defer delivery of response until .post_load */
+ s->numbytes = len;
+ return;
+ }
+
+ rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data),
+ s->buffer, len);
+
+ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, len, "From TPM");
+ }
+
+ crq->valid = SPAPR_VTPM_MSG_RESULT;
+ if (rc == H_SUCCESS) {
+ crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT;
+ crq->len = cpu_to_be16(len);
+ } else {
+ error_report("%s: DMA write failure", __func__);
+ crq->msg = SPAPR_VTPM_VTPM_ERROR;
+ crq->len = cpu_to_be16(0);
+ crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED);
+ }
+
+ rc = spapr_tpm_send_crq(&s->vdev, crq);
+ if (rc) {
+ error_report("%s: Error sending response", __func__);
+ }
+}
+
+static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize)
+{
+ return tpm_backend_startup_tpm(s->be_driver, buffersize);
+}
+
+static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+ switch (s->be_tpm_version) {
+ case TPM_VERSION_1_2:
+ return "IBM,vtpm";
+ case TPM_VERSION_2_0:
+ return "IBM,vtpm20";
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tpm_spapr_reset(SpaprVioDevice *dev)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+ s->state = SPAPR_VTPM_STATE_NONE;
+ s->numbytes = 0;
+
+ s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver);
+
+ s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
+ TPM_SPAPR_BUFFER_MAX);
+
+ tpm_backend_reset(s->be_driver);
+ tpm_spapr_do_startup_tpm(s, s->be_buffer_size);
+}
+
+static enum TPMVersion tpm_spapr_get_version(TPMIf *ti)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(ti);
+
+ if (tpm_backend_had_startup_error(s->be_driver)) {
+ return TPM_VERSION_UNSPEC;
+ }
+
+ return tpm_backend_get_tpm_version(s->be_driver);
+}
+
+/* persistent state handling */
+
+static int tpm_spapr_pre_save(void *opaque)
+{
+ SpaprTpmState *s = opaque;
+
+ tpm_backend_finish_sync(s->be_driver);
+ /*
+ * we cannot deliver the results to the VM since DMA would touch VM memory
+ */
+
+ return 0;
+}
+
+static int tpm_spapr_post_load(void *opaque, int version_id)
+{
+ SpaprTpmState *s = opaque;
+
+ if (s->numbytes) {
+ trace_tpm_spapr_post_load();
+ /* deliver the results to the VM via DMA */
+ tpm_spapr_request_completed(TPM_IF(s), 0);
+ s->numbytes = 0;
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_spapr_vtpm = {
+ .name = "tpm-spapr",
+ .pre_save = tpm_spapr_pre_save,
+ .post_load = tpm_spapr_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_SPAPR_VIO(vdev, SpaprTpmState),
+
+ VMSTATE_UINT8(state, SpaprTpmState),
+ VMSTATE_UINT32(numbytes, SpaprTpmState),
+ VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes),
+ /* remember DMA address */
+ VMSTATE_UINT32(crq.data, SpaprTpmState),
+ VMSTATE_END_OF_LIST(),
+ }
+};
+
+static Property tpm_spapr_properties[] = {
+ DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev),
+ DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+ if (!tpm_find()) {
+ error_setg(errp, "at most one TPM device is permitted");
+ return;
+ }
+
+ dev->crq.SendFunc = tpm_spapr_do_crq;
+
+ if (!s->be_driver) {
+ error_setg(errp, "'tpmdev' property is required");
+ return;
+ }
+ s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX);
+}
+
+static void tpm_spapr_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass);
+ TPMIfClass *tc = TPM_IF_CLASS(klass);
+
+ k->realize = tpm_spapr_realizefn;
+ k->reset = tpm_spapr_reset;
+ k->dt_name = "vtpm";
+ k->dt_type = "IBM,vtpm";
+ k->get_dt_compatible = tpm_spapr_get_dt_compatible;
+ k->signal_mask = 0x00000001;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, tpm_spapr_properties);
+ k->rtce_window_size = 0x10000000;
+ dc->vmsd = &vmstate_spapr_vtpm;
+
+ tc->model = TPM_MODEL_TPM_SPAPR;
+ tc->get_version = tpm_spapr_get_version;
+ tc->request_completed = tpm_spapr_request_completed;
+}
+
+static const TypeInfo tpm_spapr_info = {
+ .name = TYPE_TPM_SPAPR,
+ .parent = TYPE_VIO_SPAPR_DEVICE,
+ .instance_size = sizeof(SpaprTpmState),
+ .class_init = tpm_spapr_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_TPM_IF },
+ { }
+ }
+};
+
+static void tpm_spapr_register_types(void)
+{
+ type_register_static(&tpm_spapr_info);
+}
+
+type_init(tpm_spapr_register_types)
diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 5362df2..31facb8 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -107,30 +107,6 @@ static uint8_t tpm_tis_locality_from_addr(hwaddr addr)
return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7);
}
-static void tpm_tis_show_buffer(const unsigned char *buffer,
- size_t buffer_size, const char *string)
-{
- size_t len, i;
- char *line_buffer, *p;
-
- len = MIN(tpm_cmd_get_size(buffer), buffer_size);
-
- /*
- * allocate enough room for 3 chars per buffer entry plus a
- * newline after every 16 chars and a final null terminator.
- */
- line_buffer = g_malloc(len * 3 + (len / 16) + 1);
-
- for (i = 0, p = line_buffer; i < len; i++) {
- if (i && !(i % 16)) {
- p += sprintf(p, "\n");
- }
- p += sprintf(p, "%.2X ", buffer[i]);
- }
- trace_tpm_tis_show_buffer(string, len, line_buffer);
-
- g_free(line_buffer);
-}
/*
* Set the given flags in the STS register by clearing the register but
@@ -156,8 +132,8 @@ static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags)
*/
static void tpm_tis_tpm_send(TPMState *s, uint8_t locty)
{
- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) {
- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
}
/*
@@ -325,8 +301,8 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret)
s->loc[locty].state = TPM_TIS_STATE_COMPLETION;
s->rw_offset = 0;
- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) {
- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM");
+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM");
}
if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) {
diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c
index 62b091f..c0a0f3d 100644
--- a/hw/tpm/tpm_util.c
+++ b/hw/tpm/tpm_util.c
@@ -350,3 +350,28 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb)
tsb->buffer = NULL;
tsb->size = 0;
}
+
+void tpm_util_show_buffer(const unsigned char *buffer,
+ size_t buffer_size, const char *string)
+{
+ size_t len, i;
+ char *line_buffer, *p;
+
+ len = MIN(tpm_cmd_get_size(buffer), buffer_size);
+
+ /*
+ * allocate enough room for 3 chars per buffer entry plus a
+ * newline after every 16 chars and a final null terminator.
+ */
+ line_buffer = g_malloc(len * 3 + (len / 16) + 1);
+
+ for (i = 0, p = line_buffer; i < len; i++) {
+ if (i && !(i % 16)) {
+ p += sprintf(p, "\n");
+ }
+ p += sprintf(p, "%.2X ", buffer[i]);
+ }
+ trace_tpm_util_show_buffer(string, len, line_buffer);
+
+ g_free(line_buffer);
+}
diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h
index f397ac2..7889081 100644
--- a/hw/tpm/tpm_util.h
+++ b/hw/tpm/tpm_util.h
@@ -79,4 +79,7 @@ typedef struct TPMSizedBuffer {
void tpm_sized_buffer_reset(TPMSizedBuffer *tsb);
+void tpm_util_show_buffer(const unsigned char *buffer,
+ size_t buffer_size, const char *string);
+
#endif /* TPM_TPM_UTIL_H */
diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
index 89804bc..439e514 100644
--- a/hw/tpm/trace-events
+++ b/hw/tpm/trace-events
@@ -14,6 +14,7 @@ tpm_util_get_buffer_size_len(uint32_t len, size_t expected) "tpm_resp->len = %u,
tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu"
tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu"
tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu"
+tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s"
# tpm_emulator.c
tpm_emulator_set_locality(uint8_t locty) "setting locality to %d"
@@ -36,7 +37,6 @@ tpm_emulator_pre_save(void) ""
tpm_emulator_inst_init(void) ""
# tpm_tis.c
-tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s"
tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x"
tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d"
tpm_tis_abort(uint8_t locty) "New active locality is %d"
@@ -55,3 +55,17 @@ tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
# tpm_ppi.c
tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
+
+# hw/tpm/tpm_spapr.c
+tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s"
+tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x"
+tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT"
+tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT"
+tpm_spapr_do_crq_tpm_command(void) "got TPM command payload"
+tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu"
+tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u"
+tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend"
+tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x"
+tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..."
+tpm_spapr_post_load(void) "Delivering TPM response after resume"
+tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes"