diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-02-03 09:52:42 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-02-03 09:52:43 +0000 |
commit | 035b21977ce1791a630c5cbf46e482e54552e05b (patch) | |
tree | c7ad2235c6fc70fa1fb88e613f4134f0fec8f5cb /hw | |
parent | 28db64fce555a03b4ca256d5b6f4290abdfbd9e8 (diff) | |
parent | 63d57c8f91d0d0e62fc4d91db6340a662b36a3c0 (diff) | |
download | qemu-035b21977ce1791a630c5cbf46e482e54552e05b.zip qemu-035b21977ce1791a630c5cbf46e482e54552e05b.tar.gz qemu-035b21977ce1791a630c5cbf46e482e54552e05b.tar.bz2 |
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.0-20200203' into staging
ppc patch queue 2020-02093
This pull request supersedes ppc-for-5.0-20200131. The only changes
are one extra patch to suppress some irritating warnings during tests
under TCG, and an extra Tested-by in one of the other patches.
Here's the next batch of patches for ppc and associated machine types.
Highlights includes:
* Remove the deprecated "prep" machine type and its OpenHackware
firmware
* Add TCG emulation of the msgsndp etc. supervisor privileged
doorbell instructions
* Allow "pnv" machine type to run Hostboot style firmwares
* Add a virtual TPM device for spapr machines
* Implement devices for POWER8 PHB3 and POWER9 PHB4 host bridges for
the pnv machine type
* Use faster Spectre mitigation by default for POWER9 DD2.3 machines
* Introduce Firmware Assisted NMI dump facility for spapr machines
* Fix a performance regression with load/store multiple instructions
in TCG
as well as some other assorted cleanups and fixes.
# gpg: Signature made Mon 03 Feb 2020 03:30:24 GMT
# gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full]
# gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full]
# gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full]
# gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown]
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392
* remotes/dgibson/tags/ppc-for-5.0-20200203: (35 commits)
tests: Silence various warnings with pseries
target/ppc: Use probe_write for DCBZ
target/ppc: Remove redundant mask in DCBZ
target/ppc: Use probe_access for LMW, STMW
target/ppc: Use probe_access for LSW, STSW
ppc: spapr: Activate the FWNMI functionality
migration: Include migration support for machine check handling
ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS calls
target/ppc: Build rtas error log upon an MCE
target/ppc: Handle NMI guest exit
ppc: spapr: Introduce FWNMI capability
Wrapper function to wait on condition for the main loop mutex
target/ppc/cpu.h: Put macro parameter in parentheses
spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine
ppc/pnv: change the PowerNV machine devices to be non user creatable
ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge
ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge
docs/specs/tpm: reST-ify TPM documentation
hw/ppc/Kconfig: Enable TPM_SPAPR as part of PSERIES config
tpm_spapr: Support suspend and resume
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/intc/xics.c | 14 | ||||
-rw-r--r-- | hw/pci-host/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb3.c | 1197 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb3_msi.c | 349 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb3_pbcq.c | 358 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb4.c | 1439 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb4_pec.c | 595 | ||||
-rw-r--r-- | hw/ppc/Kconfig | 2 | ||||
-rw-r--r-- | hw/ppc/pnv.c | 204 | ||||
-rw-r--r-- | hw/ppc/pnv_core.c | 33 | ||||
-rw-r--r-- | hw/ppc/pnv_homer.c | 1 | ||||
-rw-r--r-- | hw/ppc/pnv_lpc.c | 6 | ||||
-rw-r--r-- | hw/ppc/pnv_occ.c | 1 | ||||
-rw-r--r-- | hw/ppc/pnv_pnor.c | 6 | ||||
-rw-r--r-- | hw/ppc/ppc.c | 18 | ||||
-rw-r--r-- | hw/ppc/prep.c | 384 | ||||
-rw-r--r-- | hw/ppc/spapr.c | 63 | ||||
-rw-r--r-- | hw/ppc/spapr_caps.c | 49 | ||||
-rw-r--r-- | hw/ppc/spapr_events.c | 269 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 20 | ||||
-rw-r--r-- | hw/ppc/spapr_rtas.c | 87 | ||||
-rw-r--r-- | hw/ppc/spapr_vio.c | 11 | ||||
-rw-r--r-- | hw/ppc/virtex_ml507.c | 7 | ||||
-rw-r--r-- | hw/tpm/Kconfig | 6 | ||||
-rw-r--r-- | hw/tpm/Makefile.objs | 1 | ||||
-rw-r--r-- | hw/tpm/tpm_spapr.c | 429 | ||||
-rw-r--r-- | hw/tpm/tpm_tis.c | 32 | ||||
-rw-r--r-- | hw/tpm/tpm_util.c | 25 | ||||
-rw-r--r-- | hw/tpm/tpm_util.h | 3 | ||||
-rw-r--r-- | hw/tpm/trace-events | 16 |
30 files changed, 5163 insertions, 464 deletions
diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 785b607..c5d507e 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -217,7 +217,7 @@ void icp_eoi(ICPState *icp, uint32_t xirr) } } -static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority) +void icp_irq(ICSState *ics, int server, int nr, uint8_t priority) { ICPState *icp = xics_icp_get(ics->xics, server); @@ -512,8 +512,14 @@ void ics_write_xive(ICSState *ics, int srcno, int server, static void ics_reject(ICSState *ics, uint32_t nr) { + ICSStateClass *isc = ICS_GET_CLASS(ics); ICSIRQState *irq = ics->irqs + nr - ics->offset; + if (isc->reject) { + isc->reject(ics, nr); + return; + } + trace_xics_ics_reject(nr, nr - ics->offset); if (irq->flags & XICS_FLAGS_IRQ_MSI) { irq->status |= XICS_STATUS_REJECTED; @@ -524,8 +530,14 @@ static void ics_reject(ICSState *ics, uint32_t nr) void ics_resend(ICSState *ics) { + ICSStateClass *isc = ICS_GET_CLASS(ics); int i; + if (isc->resend) { + isc->resend(ics); + return; + } + for (i = 0; i < ics->nr_irqs; i++) { /* FIXME: filter by server#? */ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs index 9c466fa..8c87e84 100644 --- a/hw/pci-host/Makefile.objs +++ b/hw/pci-host/Makefile.objs @@ -20,3 +20,5 @@ common-obj-$(CONFIG_PCI_EXPRESS_GENERIC_BRIDGE) += gpex.o common-obj-$(CONFIG_PCI_EXPRESS_XILINX) += xilinx-pcie.o common-obj-$(CONFIG_PCI_EXPRESS_DESIGNWARE) += designware.o +obj-$(CONFIG_POWERNV) += pnv_phb4.o pnv_phb4_pec.o +obj-$(CONFIG_POWERNV) += pnv_phb3.o pnv_phb3_msi.o pnv_phb3_pbcq.o diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c new file mode 100644 index 0000000..74618fa --- /dev/null +++ b/hw/pci-host/pnv_phb3.c @@ -0,0 +1,1197 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/visitor.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "hw/pci-host/pnv_phb3_regs.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/pnv.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" + +#define phb3_error(phb, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n", \ + (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__) + +static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3]; + uint8_t bus, devfn; + + if (!(addr >> 63)) { + return NULL; + } + bus = (addr >> 52) & 0xff; + devfn = (addr >> 44) & 0xff; + + return pci_find_device(pci->bus, bus, devfn); +} + +/* + * The CONFIG_DATA register expects little endian accesses, but as the + * region is big endian, we have to swap the value. + */ +static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off, + unsigned size, uint64_t val) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + + pdev = pnv_phb3_find_cfg_dev(phb); + if (!pdev) { + return; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return; + } + switch (size) { + case 1: + break; + case 2: + val = bswap16(val); + break; + case 4: + val = bswap32(val); + break; + default: + g_assert_not_reached(); + } + pci_host_config_write_common(pdev, cfg_addr, limit, val, size); +} + +static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off, + unsigned size) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + uint64_t val; + + pdev = pnv_phb3_find_cfg_dev(phb); + if (!pdev) { + return ~0ull; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return ~0ull; + } + val = pci_host_config_read_common(pdev, cfg_addr, limit, size); + switch (size) { + case 1: + return val; + case 2: + return bswap16(val); + case 4: + return bswap32(val); + default: + g_assert_not_reached(); + } +} + +static void pnv_phb3_check_m32(PnvPHB3 *phb) +{ + uint64_t base, start, size; + MemoryRegion *parent; + PnvPBCQState *pbcq = &phb->pbcq; + + if (memory_region_is_mapped(&phb->mr_m32)) { + memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32); + } + + if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) { + return; + } + + /* Grab geometry from registers */ + base = phb->regs[PHB_M32_BASE_ADDR >> 3]; + start = phb->regs[PHB_M32_START_ADDR >> 3]; + size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1; + + /* Check if it matches an enabled MMIO region in the PBCQ */ + if (memory_region_is_mapped(&pbcq->mmbar0) && + base >= pbcq->mmio0_base && + (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) { + parent = &pbcq->mmbar0; + base -= pbcq->mmio0_base; + } else if (memory_region_is_mapped(&pbcq->mmbar1) && + base >= pbcq->mmio1_base && + (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) { + parent = &pbcq->mmbar1; + base -= pbcq->mmio1_base; + } else { + return; + } + + /* Create alias */ + memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32", + &phb->pci_mmio, start, size); + memory_region_add_subregion(parent, base, &phb->mr_m32); +} + +static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index) +{ + uint64_t base, start, size, m64; + MemoryRegion *parent; + PnvPBCQState *pbcq = &phb->pbcq; + + if (memory_region_is_mapped(&phb->mr_m64[index])) { + /* Should we destroy it in RCU friendly way... ? */ + memory_region_del_subregion(phb->mr_m64[index].container, + &phb->mr_m64[index]); + } + + /* Get table entry */ + m64 = phb->ioda_M64BT[index]; + + if (!(m64 & IODA2_M64BT_ENABLE)) { + return; + } + + /* Grab geometry from registers */ + base = GETFIELD(IODA2_M64BT_BASE, m64) << 20; + if (m64 & IODA2_M64BT_SINGLE_PE) { + base &= ~0x1ffffffull; + } + size = GETFIELD(IODA2_M64BT_MASK, m64) << 20; + size |= 0xfffc000000000000ull; + size = ~size + 1; + start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]); + + /* Check if it matches an enabled MMIO region in the PBCQ */ + if (memory_region_is_mapped(&pbcq->mmbar0) && + base >= pbcq->mmio0_base && + (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) { + parent = &pbcq->mmbar0; + base -= pbcq->mmio0_base; + } else if (memory_region_is_mapped(&pbcq->mmbar1) && + base >= pbcq->mmio1_base && + (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) { + parent = &pbcq->mmbar1; + base -= pbcq->mmio1_base; + } else { + return; + } + + /* Create alias */ + memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64", + &phb->pci_mmio, start, size); + memory_region_add_subregion(parent, base, &phb->mr_m64[index]); +} + +static void pnv_phb3_check_all_m64s(PnvPHB3 *phb) +{ + uint64_t i; + + for (i = 0; i < PNV_PHB3_NUM_M64; i++) { + pnv_phb3_check_m64(phb, i); + } +} + +static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val) +{ + uint8_t server, prio; + + phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER | + IODA2_LXIVT_PRIORITY | + IODA2_LXIVT_NODE_ID); + server = GETFIELD(IODA2_LXIVT_SERVER, val); + prio = GETFIELD(IODA2_LXIVT_PRIORITY, val); + + /* + * The low order 2 bits are the link pointer (Type II interrupts). + * Shift back to get a valid IRQ server. + */ + server >>= 2; + + ics_write_xive(&phb->lsis, idx, server, prio, prio); +} + +static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb, + unsigned *out_table, unsigned *out_idx) +{ + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; + unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg); + unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg); + unsigned int mask; + uint64_t *tptr = NULL; + + switch (table) { + case IODA2_TBL_LIST: + tptr = phb->ioda_LIST; + mask = 7; + break; + case IODA2_TBL_LXIVT: + tptr = phb->ioda_LXIVT; + mask = 7; + break; + case IODA2_TBL_IVC_CAM: + case IODA2_TBL_RBA: + mask = 31; + break; + case IODA2_TBL_RCAM: + mask = 63; + break; + case IODA2_TBL_MRT: + mask = 7; + break; + case IODA2_TBL_PESTA: + case IODA2_TBL_PESTB: + mask = 255; + break; + case IODA2_TBL_TVT: + tptr = phb->ioda_TVT; + mask = 511; + break; + case IODA2_TBL_TCAM: + case IODA2_TBL_TDR: + mask = 63; + break; + case IODA2_TBL_M64BT: + tptr = phb->ioda_M64BT; + mask = 15; + break; + case IODA2_TBL_M32DT: + tptr = phb->ioda_MDT; + mask = 255; + break; + case IODA2_TBL_PEEV: + tptr = phb->ioda_PEEV; + mask = 3; + break; + default: + phb3_error(phb, "invalid IODA table %d", table); + return NULL; + } + index &= mask; + if (out_idx) { + *out_idx = index; + } + if (out_table) { + *out_table = table; + } + if (tptr) { + tptr += index; + } + if (adreg & PHB_IODA_AD_AUTOINC) { + index = (index + 1) & mask; + adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index); + } + phb->regs[PHB_IODA_ADDR >> 3] = adreg; + return tptr; +} + +static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb) +{ + unsigned table; + uint64_t *tptr; + + tptr = pnv_phb3_ioda_access(phb, &table, NULL); + if (!tptr) { + /* Return 0 on unsupported tables, not ff's */ + return 0; + } + return *tptr; +} + +static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val) +{ + unsigned table, idx; + uint64_t *tptr; + + tptr = pnv_phb3_ioda_access(phb, &table, &idx); + if (!tptr) { + return; + } + + /* Handle side effects */ + switch (table) { + case IODA2_TBL_LXIVT: + pnv_phb3_lxivt_write(phb, idx, val); + break; + case IODA2_TBL_M64BT: + *tptr = val; + pnv_phb3_check_m64(phb, idx); + break; + default: + *tptr = val; + } +} + +/* + * This is called whenever the PHB LSI, MSI source ID register or + * the PBCQ irq filters are written. + */ +void pnv_phb3_remap_irqs(PnvPHB3 *phb) +{ + ICSState *ics = &phb->lsis; + uint32_t local, global, count, mask, comp; + uint64_t baren; + PnvPBCQState *pbcq = &phb->pbcq; + + /* + * First check if we are enabled. Unlike real HW we don't separate + * TX and RX so we enable if both are set + */ + baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN]; + if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) || + !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) { + ics->offset = 0; + return; + } + + /* Grab local LSI source ID */ + local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3; + + /* Grab global one and compare */ + global = GETFIELD(PBCQ_NEST_LSI_SRC, + pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3; + if (global != local) { + /* + * This happens during initialization, let's come back when we + * are properly configured + */ + ics->offset = 0; + return; + } + + /* Get the base on the powerbus */ + comp = GETFIELD(PBCQ_NEST_IRSN_COMP, + pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]); + mask = GETFIELD(PBCQ_NEST_IRSN_COMP, + pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]); + count = ((~mask) + 1) & 0x7ffff; + phb->total_irq = count; + + /* Sanity checks */ + if ((global + PNV_PHB3_NUM_LSI) > count) { + phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global, + count); + } + + if (count > 2048) { + phb3_error(phb, "More interrupts than supported: %d", count); + } + + if ((comp & mask) != comp) { + phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x", + comp, mask); + comp &= mask; + } + /* Setup LSI offset */ + ics->offset = comp + global; + + /* Setup MSI offset */ + pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI); +} + +static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val) +{ + /* Sanitize content */ + val &= PHB_LSI_SRC_ID; + phb->regs[PHB_LSI_SOURCE_ID >> 3] = val; + pnv_phb3_remap_irqs(phb); +} + +static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val) +{ + PnvPhb3DMASpace *ds; + + /* Always invalidate all for now ... */ + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + ds->pe_num = PHB_INVALID_PE; + } +} + + +static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds) +{ + uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3]; + + if (cfg & PHB_PHB3C_32BIT_MSI_EN) { + if (!memory_region_is_mapped(&ds->msi32_mr)) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + 0xffff0000, &ds->msi32_mr); + } + } else { + if (memory_region_is_mapped(&ds->msi32_mr)) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi32_mr); + } + } + + if (cfg & PHB_PHB3C_64BIT_MSI_EN) { + if (!memory_region_is_mapped(&ds->msi64_mr)) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + (1ull << 60), &ds->msi64_mr); + } + } else { + if (memory_region_is_mapped(&ds->msi64_mr)) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi64_mr); + } + } +} + +static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb) +{ + PnvPhb3DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + pnv_phb3_update_msi_regions(ds); + } +} + +void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size) +{ + PnvPHB3 *phb = opaque; + bool changed; + + /* Special case configuration data */ + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + pnv_phb3_config_write(phb, off & 0x3, size, val); + return; + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return; + } + + /* Handle masking & filtering */ + switch (off) { + case PHB_M64_UPPER_BITS: + val &= 0xfffc000000000000ull; + break; + case PHB_Q_DMA_R: + /* + * This is enough logic to make SW happy but we aren't actually + * quiescing the DMAs + */ + if (val & PHB_Q_DMA_R_AUTORESET) { + val = 0; + } else { + val &= PHB_Q_DMA_R_QUIESCE_DMA; + } + break; + /* LEM stuff */ + case PHB_LEM_FIR_AND_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val; + return; + case PHB_LEM_FIR_OR_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val; + return; + case PHB_LEM_ERROR_AND_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val; + return; + case PHB_LEM_ERROR_OR_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val; + return; + case PHB_LEM_WOF: + val = 0; + break; + } + + /* Record whether it changed */ + changed = phb->regs[off >> 3] != val; + + /* Store in register cache first */ + phb->regs[off >> 3] = val; + + /* Handle side effects */ + switch (off) { + case PHB_PHB3_CONFIG: + if (changed) { + pnv_phb3_update_all_msi_regions(phb); + } + /* fall through */ + case PHB_M32_BASE_ADDR: + case PHB_M32_BASE_MASK: + case PHB_M32_START_ADDR: + if (changed) { + pnv_phb3_check_m32(phb); + } + break; + case PHB_M64_UPPER_BITS: + if (changed) { + pnv_phb3_check_all_m64s(phb); + } + break; + case PHB_LSI_SOURCE_ID: + if (changed) { + pnv_phb3_lsi_src_id_write(phb, val); + } + break; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + pnv_phb3_ioda_write(phb, val); + break; + + /* RTC invalidation */ + case PHB_RTC_INVALIDATE: + pnv_phb3_rtc_invalidate(phb, val); + break; + + /* FFI request */ + case PHB_FFI_REQUEST: + pnv_phb3_msi_ffi(&phb->msis, val); + break; + + /* Silent simple writes */ + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_RBA_BAR: + case PHB_IVT_BAR: + case PHB_FFI_LOCK: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } +} + +uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size) +{ + PnvPHB3 *phb = opaque; + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + uint64_t val; + + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + return pnv_phb3_config_read(phb, off & 0x3, size); + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return ~0ull; + } + + /* Default read from cache */ + val = phb->regs[off >> 3]; + + switch (off) { + /* Simulate venice DD2.0 */ + case PHB_VERSION: + return 0x000000a300000005ull; + case PHB_PCIE_SYSTEM_CONFIG: + return 0x441100fc30000000; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + return pnv_phb3_ioda_read(phb); + + /* Link training always appears trained */ + case PHB_PCIE_DLP_TRAIN_CTL: + if (!pci_find_device(pci->bus, 1, 0)) { + return 0; + } + return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT; + + /* FFI Lock */ + case PHB_FFI_LOCK: + /* Set lock and return previous value */ + phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE; + return val; + + /* DMA read sync: make it look like it's complete */ + case PHB_DMARD_SYNC: + return PHB_DMARD_SYNC_COMPLETE; + + /* Silent simple reads */ + case PHB_PHB3_CONFIG: + case PHB_M32_BASE_ADDR: + case PHB_M32_BASE_MASK: + case PHB_M32_START_ADDR: + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_RTC_INVALIDATE: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_RBA_BAR: + case PHB_IVT_BAR: + case PHB_M64_UPPER_BITS: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } + return val; +} + +static const MemoryRegionOps pnv_phb3_reg_ops = { + .read = pnv_phb3_reg_read, + .write = pnv_phb3_reg_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num) +{ + /* Check that out properly ... */ + return irq_num & 3; +} + +static void pnv_phb3_set_irq(void *opaque, int irq_num, int level) +{ + PnvPHB3 *phb = opaque; + + /* LSI only ... */ + if (irq_num > 3) { + phb3_error(phb, "Unknown IRQ to set %d", irq_num); + } + qemu_set_irq(phb->qirqs[irq_num], level); +} + +static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds) +{ + uint64_t rtt, addr; + uint16_t rte; + int bus_num; + + /* Already resolved ? */ + if (ds->pe_num != PHB_INVALID_PE) { + return true; + } + + /* We need to lookup the RTT */ + rtt = ds->phb->regs[PHB_RTT_BAR >> 3]; + if (!(rtt & PHB_RTT_BAR_ENABLE)) { + phb3_error(ds->phb, "DMA with RTT BAR disabled !"); + /* Set error bits ? fence ? ... */ + return false; + } + + /* Read RTE */ + bus_num = pci_bus_num(ds->bus); + addr = rtt & PHB_RTT_BASE_ADDRESS_MASK; + addr += 2 * ((bus_num << 8) | ds->devfn); + if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) { + phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr); + /* Set error bits ? fence ? ... */ + return false; + } + rte = be16_to_cpu(rte); + + /* Fail upon reading of invalid PE# */ + if (rte >= PNV_PHB3_NUM_PE) { + phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte); + /* Set error bits ? fence ? ... */ + return false; + } + ds->pe_num = rte; + return true; +} + +static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr, + bool is_write, uint64_t tve, + IOMMUTLBEntry *tlb) +{ + uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve); + int32_t lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve); + uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve); + uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve); + PnvPHB3 *phb = ds->phb; + + /* Invalid levels */ + if (lev > 4) { + phb3_error(phb, "Invalid #levels in TVE %d", lev); + return; + } + + /* IO Page Size of 0 means untranslated, else use TCEs */ + if (tps == 0) { + /* + * We only support non-translate in top window. + * + * TODO: Venice/Murano support it on bottom window above 4G and + * Naples suports it on everything + */ + if (!(tve & PPC_BIT(51))) { + phb3_error(phb, "xlate for invalid non-translate TVE"); + return; + } + /* TODO: Handle boundaries */ + + /* Use 4k pages like q35 ... for now */ + tlb->iova = addr & 0xfffffffffffff000ull; + tlb->translated_addr = addr & 0x0003fffffffff000ull; + tlb->addr_mask = 0xfffull; + tlb->perm = IOMMU_RW; + } else { + uint32_t tce_shift, tbl_shift, sh; + uint64_t base, taddr, tce, tce_mask; + + /* TVE disabled ? */ + if (tts == 0) { + phb3_error(phb, "xlate for invalid translated TVE"); + return; + } + + /* Address bits per bottom level TCE entry */ + tce_shift = tps + 11; + + /* Address bits per table level */ + tbl_shift = tts + 8; + + /* Top level table base address */ + base = tta << 12; + + /* Total shift to first level */ + sh = tbl_shift * lev + tce_shift; + + /* TODO: Multi-level untested */ + while ((lev--) >= 0) { + /* Grab the TCE address */ + taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); + if (dma_memory_read(&address_space_memory, taddr, &tce, + sizeof(tce))) { + phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr); + return; + } + tce = be64_to_cpu(tce); + + /* Check permission for indirect TCE */ + if ((lev >= 0) && !(tce & 3)) { + phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr); + phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + return; + } + sh -= tbl_shift; + base = tce & ~0xfffull; + } + + /* We exit the loop with TCE being the final TCE */ + tce_mask = ~((1ull << tce_shift) - 1); + tlb->iova = addr & tce_mask; + tlb->translated_addr = tce & tce_mask; + tlb->addr_mask = ~tce_mask; + tlb->perm = tce & 3; + if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { + phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr); + phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + } + } +} + +static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr); + int tve_sel; + uint64_t tve, cfg; + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + PnvPHB3 *phb = ds->phb; + + /* Resolve PE# */ + if (!pnv_phb3_resolve_pe(ds)) { + phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return ret; + } + + /* Check top bits */ + switch (addr >> 60) { + case 00: + /* DMA or 32-bit MSI ? */ + cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3]; + if ((cfg & PHB_PHB3C_32BIT_MSI_EN) && + ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) { + phb3_error(phb, "xlate on 32-bit MSI region"); + return ret; + } + /* Choose TVE XXX Use PHB3 Control Register */ + tve_sel = (addr >> 59) & 1; + tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel]; + pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret); + break; + case 01: + phb3_error(phb, "xlate on 64-bit MSI region"); + break; + default: + phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr); + } + return ret; +} + +#define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region" +#define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \ + OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION) + +static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = pnv_phb3_translate_iommu; +} + +static const TypeInfo pnv_phb3_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION, + .class_init = pnv_phb3_iommu_memory_region_class_init, +}; + +/* + * MSI/MSIX memory region implementation. + * The handler handles both MSI and MSIX. + */ +static void pnv_phb3_msi_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + PnvPhb3DMASpace *ds = opaque; + + /* Resolve PE# */ + if (!pnv_phb3_resolve_pe(ds)) { + phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return; + } + + pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num); +} + +/* There is no .read as the read result is undefined by PCI spec */ +static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPhb3DMASpace *ds = opaque; + + phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr); + return -1; +} + +static const MemoryRegionOps pnv_phb3_msi_ops = { + .read = pnv_phb3_msi_read, + .write = pnv_phb3_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN +}; + +static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ + PnvPHB3 *phb = opaque; + PnvPhb3DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + if (ds->bus == bus && ds->devfn == devfn) { + break; + } + } + + if (ds == NULL) { + ds = g_malloc0(sizeof(PnvPhb3DMASpace)); + ds->bus = bus; + ds->devfn = devfn; + ds->pe_num = PHB_INVALID_PE; + ds->phb = phb; + memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr), + TYPE_PNV_PHB3_IOMMU_MEMORY_REGION, + OBJECT(phb), "phb3_iommu", UINT64_MAX); + address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr), + "phb3_iommu"); + memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops, + ds, "msi32", 0x10000); + memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops, + ds, "msi64", 0x100000); + pnv_phb3_update_msi_regions(ds); + + QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list); + } + return &ds->dma_as; +} + +static void pnv_phb3_instance_init(Object *obj) +{ + PnvPHB3 *phb = PNV_PHB3(obj); + + QLIST_INIT(&phb->dma_spaces); + + /* LSI sources */ + object_initialize_child(obj, "lsi", &phb->lsis, sizeof(phb->lsis), + TYPE_ICS, &error_abort, NULL); + + /* Default init ... will be fixed by HW inits */ + phb->lsis.offset = 0; + + /* MSI sources */ + object_initialize_child(obj, "msi", &phb->msis, sizeof(phb->msis), + TYPE_PHB3_MSI, &error_abort, NULL); + + /* Power Bus Common Queue */ + object_initialize_child(obj, "pbcq", &phb->pbcq, sizeof(phb->pbcq), + TYPE_PNV_PBCQ, &error_abort, NULL); + + /* Root Port */ + object_initialize_child(obj, "root", &phb->root, sizeof(phb->root), + TYPE_PNV_PHB3_ROOT_PORT, &error_abort, NULL); + qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0)); + qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false); +} + +static void pnv_phb3_realize(DeviceState *dev, Error **errp) +{ + PnvPHB3 *phb = PNV_PHB3(dev); + PCIHostState *pci = PCI_HOST_BRIDGE(dev); + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + Error *local_err = NULL; + int i; + + if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) { + error_setg(errp, "invalid PHB index: %d", phb->phb_id); + return; + } + + /* LSI sources */ + object_property_set_link(OBJECT(&phb->lsis), OBJECT(pnv), "xics", + &error_abort); + object_property_set_int(OBJECT(&phb->lsis), PNV_PHB3_NUM_LSI, "nr-irqs", + &error_abort); + object_property_set_bool(OBJECT(&phb->lsis), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + for (i = 0; i < phb->lsis.nr_irqs; i++) { + ics_set_irq_type(&phb->lsis, i, true); + } + + phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs); + + /* MSI sources */ + object_property_set_link(OBJECT(&phb->msis), OBJECT(phb), "phb", + &error_abort); + object_property_set_link(OBJECT(&phb->msis), OBJECT(pnv), "xics", + &error_abort); + object_property_set_int(OBJECT(&phb->msis), PHB3_MAX_MSI, "nr-irqs", + &error_abort); + object_property_set_bool(OBJECT(&phb->msis), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Power Bus Common Queue */ + object_property_set_link(OBJECT(&phb->pbcq), OBJECT(phb), "phb", + &error_abort); + object_property_set_bool(OBJECT(&phb->pbcq), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Controller Registers */ + memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb, + "phb3-regs", 0x1000); + + /* + * PHB3 doesn't support IO space. However, qemu gets very upset if + * we don't have an IO region to anchor IO BARs onto so we just + * initialize one which we never hook up to anything + */ + memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000); + memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio", + PCI_MMIO_TOTAL_SIZE); + + pci->bus = pci_register_root_bus(dev, "root-bus", + pnv_phb3_set_irq, pnv_phb3_map_irq, phb, + &phb->pci_mmio, &phb->pci_io, + 0, 4, TYPE_PNV_PHB3_ROOT_BUS); + + pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); + + /* Add a single Root port */ + qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id); + qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id); + qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus)); + qdev_init_nofail(DEVICE(&phb->root)); +} + +void pnv_phb3_update_regions(PnvPHB3 *phb) +{ + PnvPBCQState *pbcq = &phb->pbcq; + + /* Unmap first always */ + if (memory_region_is_mapped(&phb->mr_regs)) { + memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs); + } + + /* Map registers if enabled */ + if (memory_region_is_mapped(&pbcq->phbbar)) { + /* TODO: We should use the PHB BAR 2 register but we don't ... */ + memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs); + } + + /* Check/update m32 */ + if (memory_region_is_mapped(&phb->mr_m32)) { + pnv_phb3_check_m32(phb); + } + pnv_phb3_check_all_m64s(phb); +} + +static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + PnvPHB3 *phb = PNV_PHB3(host_bridge); + + snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x", + phb->chip_id, phb->phb_id); + return phb->bus_path; +} + +static Property pnv_phb3_properties[] = { + DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_phb3_class_init(ObjectClass *klass, void *data) +{ + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + hc->root_bus_path = pnv_phb3_root_bus_path; + dc->realize = pnv_phb3_realize; + device_class_set_props(dc, pnv_phb3_properties); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->user_creatable = false; +} + +static const TypeInfo pnv_phb3_type_info = { + .name = TYPE_PNV_PHB3, + .parent = TYPE_PCIE_HOST_BRIDGE, + .instance_size = sizeof(PnvPHB3), + .class_init = pnv_phb3_class_init, + .instance_init = pnv_phb3_instance_init, +}; + +static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + /* + * PHB3 has only a single root complex. Enforce the limit on the + * parent bus + */ + k->max_dev = 1; +} + +static const TypeInfo pnv_phb3_root_bus_info = { + .name = TYPE_PNV_PHB3_ROOT_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pnv_phb3_root_bus_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { } + }, +}; + +static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + Error *local_err = NULL; + + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass); + + dc->desc = "IBM PHB3 PCIE Root Port"; + + device_class_set_parent_realize(dc, pnv_phb3_root_port_realize, + &rpc->parent_realize); + dc->user_creatable = false; + + k->vendor_id = PCI_VENDOR_ID_IBM; + k->device_id = 0x03dc; + k->revision = 0; + + rpc->exp_offset = 0x48; + rpc->aer_offset = 0x100; +} + +static const TypeInfo pnv_phb3_root_port_info = { + .name = TYPE_PNV_PHB3_ROOT_PORT, + .parent = TYPE_PCIE_ROOT_PORT, + .instance_size = sizeof(PnvPHB3RootPort), + .class_init = pnv_phb3_root_port_class_init, +}; + +static void pnv_phb3_register_types(void) +{ + type_register_static(&pnv_phb3_root_bus_info); + type_register_static(&pnv_phb3_root_port_info); + type_register_static(&pnv_phb3_type_info); + type_register_static(&pnv_phb3_iommu_memory_region_info); +} + +type_init(pnv_phb3_register_types) diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c new file mode 100644 index 0000000..ecfc1b2 --- /dev/null +++ b/hw/pci-host/pnv_phb3_msi.c @@ -0,0 +1,349 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "hw/pci-host/pnv_phb3_regs.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/ppc/pnv.h" +#include "hw/pci/msi.h" +#include "monitor/monitor.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "sysemu/reset.h" + +static uint64_t phb3_msi_ive_addr(PnvPHB3 *phb, int srcno) +{ + uint64_t ivtbar = phb->regs[PHB_IVT_BAR >> 3]; + uint64_t phbctl = phb->regs[PHB_CONTROL >> 3]; + + if (!(ivtbar & PHB_IVT_BAR_ENABLE)) { + qemu_log_mask(LOG_GUEST_ERROR, "Failed access to disable IVT BAR !"); + return 0; + } + + if (srcno >= (ivtbar & PHB_IVT_LENGTH_MASK)) { + qemu_log_mask(LOG_GUEST_ERROR, "MSI out of bounds (%d vs 0x%"PRIx64")", + srcno, (uint64_t) (ivtbar & PHB_IVT_LENGTH_MASK)); + return 0; + } + + ivtbar &= PHB_IVT_BASE_ADDRESS_MASK; + + if (phbctl & PHB_CTRL_IVE_128_BYTES) { + return ivtbar + 128 * srcno; + } else { + return ivtbar + 16 * srcno; + } +} + +static bool phb3_msi_read_ive(PnvPHB3 *phb, int srcno, uint64_t *out_ive) +{ + uint64_t ive_addr, ive; + + ive_addr = phb3_msi_ive_addr(phb, srcno); + if (!ive_addr) { + return false; + } + + if (dma_memory_read(&address_space_memory, ive_addr, &ive, sizeof(ive))) { + qemu_log_mask(LOG_GUEST_ERROR, "Failed to read IVE at 0x%" PRIx64, + ive_addr); + return false; + } + *out_ive = be64_to_cpu(ive); + + return true; +} + +static void phb3_msi_set_p(Phb3MsiState *msi, int srcno, uint8_t gen) +{ + uint64_t ive_addr; + uint8_t p = 0x01 | (gen << 1); + + ive_addr = phb3_msi_ive_addr(msi->phb, srcno); + if (!ive_addr) { + return; + } + + if (dma_memory_write(&address_space_memory, ive_addr + 4, &p, 1)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Failed to write IVE (set P) at 0x%" PRIx64, ive_addr); + } +} + +static void phb3_msi_set_q(Phb3MsiState *msi, int srcno) +{ + uint64_t ive_addr; + uint8_t q = 0x01; + + ive_addr = phb3_msi_ive_addr(msi->phb, srcno); + if (!ive_addr) { + return; + } + + if (dma_memory_write(&address_space_memory, ive_addr + 5, &q, 1)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Failed to write IVE (set Q) at 0x%" PRIx64, ive_addr); + } +} + +static void phb3_msi_try_send(Phb3MsiState *msi, int srcno, bool force) +{ + ICSState *ics = ICS(msi); + uint64_t ive; + uint64_t server, prio, pq, gen; + + if (!phb3_msi_read_ive(msi->phb, srcno, &ive)) { + return; + } + + server = GETFIELD(IODA2_IVT_SERVER, ive); + prio = GETFIELD(IODA2_IVT_PRIORITY, ive); + if (!force) { + pq = GETFIELD(IODA2_IVT_Q, ive) | (GETFIELD(IODA2_IVT_P, ive) << 1); + } else { + pq = 0; + } + gen = GETFIELD(IODA2_IVT_GEN, ive); + + /* + * The low order 2 bits are the link pointer (Type II interrupts). + * Shift back to get a valid IRQ server. + */ + server >>= 2; + + switch (pq) { + case 0: /* 00 */ + if (prio == 0xff) { + /* Masked, set Q */ + phb3_msi_set_q(msi, srcno); + } else { + /* Enabled, set P and send */ + phb3_msi_set_p(msi, srcno, gen); + icp_irq(ics, server, srcno + ics->offset, prio); + } + break; + case 2: /* 10 */ + /* Already pending, set Q */ + phb3_msi_set_q(msi, srcno); + break; + case 1: /* 01 */ + case 3: /* 11 */ + default: + /* Just drop stuff if Q already set */ + break; + } +} + +static void phb3_msi_set_irq(void *opaque, int srcno, int val) +{ + Phb3MsiState *msi = PHB3_MSI(opaque); + + if (val) { + phb3_msi_try_send(msi, srcno, false); + } +} + + +void pnv_phb3_msi_send(Phb3MsiState *msi, uint64_t addr, uint16_t data, + int32_t dev_pe) +{ + ICSState *ics = ICS(msi); + uint64_t ive; + uint16_t pe; + uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f); + + if (src >= ics->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "MSI %d out of bounds", src); + return; + } + if (dev_pe >= 0) { + if (!phb3_msi_read_ive(msi->phb, src, &ive)) { + return; + } + pe = GETFIELD(IODA2_IVT_PE, ive); + if (pe != dev_pe) { + qemu_log_mask(LOG_GUEST_ERROR, + "MSI %d send by PE#%d but assigned to PE#%d", + src, dev_pe, pe); + return; + } + } + qemu_irq_pulse(msi->qirqs[src]); +} + +void pnv_phb3_msi_ffi(Phb3MsiState *msi, uint64_t val) +{ + /* Emit interrupt */ + pnv_phb3_msi_send(msi, val, 0, -1); + + /* Clear FFI lock */ + msi->phb->regs[PHB_FFI_LOCK >> 3] = 0; +} + +static void phb3_msi_reject(ICSState *ics, uint32_t nr) +{ + Phb3MsiState *msi = PHB3_MSI(ics); + unsigned int srcno = nr - ics->offset; + unsigned int idx = srcno >> 6; + unsigned int bit = 1ull << (srcno & 0x3f); + + assert(srcno < PHB3_MAX_MSI); + + msi->rba[idx] |= bit; + msi->rba_sum |= (1u << idx); +} + +static void phb3_msi_resend(ICSState *ics) +{ + Phb3MsiState *msi = PHB3_MSI(ics); + unsigned int i, j; + + if (msi->rba_sum == 0) { + return; + } + + for (i = 0; i < 32; i++) { + if ((msi->rba_sum & (1u << i)) == 0) { + continue; + } + msi->rba_sum &= ~(1u << i); + for (j = 0; j < 64; j++) { + if ((msi->rba[i] & (1ull << j)) == 0) { + continue; + } + msi->rba[i] &= ~(1u << j); + phb3_msi_try_send(msi, i * 64 + j, true); + } + } +} + +static void phb3_msi_reset(DeviceState *dev) +{ + Phb3MsiState *msi = PHB3_MSI(dev); + ICSStateClass *icsc = ICS_GET_CLASS(dev); + + icsc->parent_reset(dev); + + memset(msi->rba, 0, sizeof(msi->rba)); + msi->rba_sum = 0; +} + +static void phb3_msi_reset_handler(void *dev) +{ + phb3_msi_reset(dev); +} + +void pnv_phb3_msi_update_config(Phb3MsiState *msi, uint32_t base, + uint32_t count) +{ + ICSState *ics = ICS(msi); + + if (count > PHB3_MAX_MSI) { + count = PHB3_MAX_MSI; + } + ics->nr_irqs = count; + ics->offset = base; +} + +static void phb3_msi_realize(DeviceState *dev, Error **errp) +{ + Phb3MsiState *msi = PHB3_MSI(dev); + ICSState *ics = ICS(msi); + ICSStateClass *icsc = ICS_GET_CLASS(ics); + Error *local_err = NULL; + + assert(msi->phb); + + icsc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + msi->qirqs = qemu_allocate_irqs(phb3_msi_set_irq, msi, ics->nr_irqs); + + qemu_register_reset(phb3_msi_reset_handler, dev); +} + +static void phb3_msi_instance_init(Object *obj) +{ + Phb3MsiState *msi = PHB3_MSI(obj); + ICSState *ics = ICS(obj); + + object_property_add_link(obj, "phb", TYPE_PNV_PHB3, + (Object **)&msi->phb, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG, + &error_abort); + + /* Will be overriden later */ + ics->offset = 0; +} + +static void phb3_msi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ICSStateClass *isc = ICS_CLASS(klass); + + device_class_set_parent_realize(dc, phb3_msi_realize, + &isc->parent_realize); + device_class_set_parent_reset(dc, phb3_msi_reset, + &isc->parent_reset); + + isc->reject = phb3_msi_reject; + isc->resend = phb3_msi_resend; +} + +static const TypeInfo phb3_msi_info = { + .name = TYPE_PHB3_MSI, + .parent = TYPE_ICS, + .instance_size = sizeof(Phb3MsiState), + .class_init = phb3_msi_class_init, + .class_size = sizeof(ICSStateClass), + .instance_init = phb3_msi_instance_init, +}; + +static void pnv_phb3_msi_register_types(void) +{ + type_register_static(&phb3_msi_info); +} + +type_init(pnv_phb3_msi_register_types); + +void pnv_phb3_msi_pic_print_info(Phb3MsiState *msi, Monitor *mon) +{ + ICSState *ics = ICS(msi); + int i; + + monitor_printf(mon, "ICS %4x..%4x %p\n", + ics->offset, ics->offset + ics->nr_irqs - 1, ics); + + for (i = 0; i < ics->nr_irqs; i++) { + uint64_t ive; + + if (!phb3_msi_read_ive(msi->phb, i, &ive)) { + return; + } + + if (GETFIELD(IODA2_IVT_PRIORITY, ive) == 0xff) { + continue; + } + + monitor_printf(mon, " %4x %c%c server=%04x prio=%02x gen=%d\n", + ics->offset + i, + GETFIELD(IODA2_IVT_P, ive) ? 'P' : '-', + GETFIELD(IODA2_IVT_Q, ive) ? 'Q' : '-', + (uint32_t) GETFIELD(IODA2_IVT_SERVER, ive) >> 2, + (uint32_t) GETFIELD(IODA2_IVT_PRIORITY, ive), + (uint32_t) GETFIELD(IODA2_IVT_GEN, ive)); + } +} diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c new file mode 100644 index 0000000..f232228 --- /dev/null +++ b/hw/pci-host/pnv_phb3_pbcq.c @@ -0,0 +1,358 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/log.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/fdt.h" +#include "hw/pci-host/pnv_phb3_regs.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" + +#include <libfdt.h> + +#define phb3_pbcq_error(pbcq, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb3_pbcq[%d:%d]: " fmt "\n", \ + (pbcq)->phb->chip_id, (pbcq)->phb->phb_id, ## __VA_ARGS__) + +static uint64_t pnv_pbcq_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t offset = addr >> 3; + + return pbcq->nest_regs[offset]; +} + +static uint64_t pnv_pbcq_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t offset = addr >> 3; + + return pbcq->pci_regs[offset]; +} + +static uint64_t pnv_pbcq_spci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t offset = addr >> 3; + + if (offset == PBCQ_SPCI_ASB_DATA) { + return pnv_phb3_reg_read(pbcq->phb, + pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], 8); + } + return pbcq->spci_regs[offset]; +} + +static void pnv_pbcq_update_map(PnvPBCQState *pbcq) +{ + uint64_t bar_en = pbcq->nest_regs[PBCQ_NEST_BAR_EN]; + uint64_t bar, mask, size; + + /* + * NOTE: This will really not work well if those are remapped + * after the PHB has created its sub regions. We could do better + * if we had a way to resize regions but we don't really care + * that much in practice as the stuff below really only happens + * once early during boot + */ + + /* Handle unmaps */ + if (memory_region_is_mapped(&pbcq->mmbar0) && + !(bar_en & PBCQ_NEST_BAR_EN_MMIO0)) { + memory_region_del_subregion(get_system_memory(), &pbcq->mmbar0); + } + if (memory_region_is_mapped(&pbcq->mmbar1) && + !(bar_en & PBCQ_NEST_BAR_EN_MMIO1)) { + memory_region_del_subregion(get_system_memory(), &pbcq->mmbar1); + } + if (memory_region_is_mapped(&pbcq->phbbar) && + !(bar_en & PBCQ_NEST_BAR_EN_PHB)) { + memory_region_del_subregion(get_system_memory(), &pbcq->phbbar); + } + + /* Update PHB */ + pnv_phb3_update_regions(pbcq->phb); + + /* Handle maps */ + if (!memory_region_is_mapped(&pbcq->mmbar0) && + (bar_en & PBCQ_NEST_BAR_EN_MMIO0)) { + bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] >> 14; + mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0]; + size = ((~mask) >> 14) + 1; + memory_region_init(&pbcq->mmbar0, OBJECT(pbcq), "pbcq-mmio0", size); + memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar0); + pbcq->mmio0_base = bar; + pbcq->mmio0_size = size; + } + if (!memory_region_is_mapped(&pbcq->mmbar1) && + (bar_en & PBCQ_NEST_BAR_EN_MMIO1)) { + bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] >> 14; + mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1]; + size = ((~mask) >> 14) + 1; + memory_region_init(&pbcq->mmbar1, OBJECT(pbcq), "pbcq-mmio1", size); + memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar1); + pbcq->mmio1_base = bar; + pbcq->mmio1_size = size; + } + if (!memory_region_is_mapped(&pbcq->phbbar) + && (bar_en & PBCQ_NEST_BAR_EN_PHB)) { + bar = pbcq->nest_regs[PBCQ_NEST_PHB_BAR] >> 14; + size = 0x1000; + memory_region_init(&pbcq->phbbar, OBJECT(pbcq), "pbcq-phb", size); + memory_region_add_subregion(get_system_memory(), bar, &pbcq->phbbar); + } + + /* Update PHB */ + pnv_phb3_update_regions(pbcq->phb); +} + +static void pnv_pbcq_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PBCQ_NEST_MMIO_BAR0: + case PBCQ_NEST_MMIO_BAR1: + case PBCQ_NEST_MMIO_MASK0: + case PBCQ_NEST_MMIO_MASK1: + if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & + (PBCQ_NEST_BAR_EN_MMIO0 | + PBCQ_NEST_BAR_EN_MMIO1)) { + phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported"); + } + pbcq->nest_regs[reg] = val & 0xffffffffc0000000ull; + break; + case PBCQ_NEST_PHB_BAR: + if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & PBCQ_NEST_BAR_EN_PHB) { + phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported"); + } + pbcq->nest_regs[reg] = val & 0xfffffffffc000000ull; + break; + case PBCQ_NEST_BAR_EN: + pbcq->nest_regs[reg] = val & 0xf800000000000000ull; + pnv_pbcq_update_map(pbcq); + pnv_phb3_remap_irqs(pbcq->phb); + break; + case PBCQ_NEST_IRSN_COMPARE: + case PBCQ_NEST_IRSN_MASK: + pbcq->nest_regs[reg] = val & PBCQ_NEST_IRSN_COMP; + pnv_phb3_remap_irqs(pbcq->phb); + break; + case PBCQ_NEST_LSI_SRC_ID: + pbcq->nest_regs[reg] = val & PBCQ_NEST_LSI_SRC; + pnv_phb3_remap_irqs(pbcq->phb); + break; + default: + phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, + addr, val); + } +} + +static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PBCQ_PCI_BAR2: + pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull; + pnv_pbcq_update_map(pbcq); + default: + phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, + addr, val); + } +} + +static void pnv_pbcq_spci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PBCQ_SPCI_ASB_ADDR: + pbcq->spci_regs[reg] = val & 0xfff; + break; + case PBCQ_SPCI_ASB_STATUS: + pbcq->spci_regs[reg] &= ~val; + break; + case PBCQ_SPCI_ASB_DATA: + pnv_phb3_reg_write(pbcq->phb, pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], + val, 8); + break; + case PBCQ_SPCI_AIB_CAPP_EN: + case PBCQ_SPCI_CAPP_SEC_TMR: + break; + default: + phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, + addr, val); + } +} + +static const MemoryRegionOps pnv_pbcq_nest_xscom_ops = { + .read = pnv_pbcq_nest_xscom_read, + .write = pnv_pbcq_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static const MemoryRegionOps pnv_pbcq_pci_xscom_ops = { + .read = pnv_pbcq_pci_xscom_read, + .write = pnv_pbcq_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static const MemoryRegionOps pnv_pbcq_spci_xscom_ops = { + .read = pnv_pbcq_spci_xscom_read, + .write = pnv_pbcq_spci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_pbcq_default_bars(PnvPBCQState *pbcq) +{ + uint64_t mm0, mm1, reg; + PnvPHB3 *phb = pbcq->phb; + + mm0 = 0x3d00000000000ull + 0x4000000000ull * phb->chip_id + + 0x1000000000ull * phb->phb_id; + mm1 = 0x3ff8000000000ull + 0x0200000000ull * phb->chip_id + + 0x0080000000ull * phb->phb_id; + reg = 0x3fffe40000000ull + 0x0000400000ull * phb->chip_id + + 0x0000100000ull * phb->phb_id; + + pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] = mm0 << 14; + pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] = mm1 << 14; + pbcq->nest_regs[PBCQ_NEST_PHB_BAR] = reg << 14; + pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0] = 0x3fff000000000ull << 14; + pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1] = 0x3ffff80000000ull << 14; + pbcq->pci_regs[PBCQ_PCI_BAR2] = reg << 14; +} + +static void pnv_pbcq_realize(DeviceState *dev, Error **errp) +{ + PnvPBCQState *pbcq = PNV_PBCQ(dev); + PnvPHB3 *phb; + char name[32]; + + assert(pbcq->phb); + phb = pbcq->phb; + + /* TODO: Fix OPAL to do that: establish default BAR values */ + pnv_pbcq_default_bars(pbcq); + + /* Initialize the XSCOM region for the PBCQ registers */ + snprintf(name, sizeof(name), "xscom-pbcq-nest-%d.%d", + phb->chip_id, phb->phb_id); + pnv_xscom_region_init(&pbcq->xscom_nest_regs, OBJECT(dev), + &pnv_pbcq_nest_xscom_ops, pbcq, name, + PNV_XSCOM_PBCQ_NEST_SIZE); + snprintf(name, sizeof(name), "xscom-pbcq-pci-%d.%d", + phb->chip_id, phb->phb_id); + pnv_xscom_region_init(&pbcq->xscom_pci_regs, OBJECT(dev), + &pnv_pbcq_pci_xscom_ops, pbcq, name, + PNV_XSCOM_PBCQ_PCI_SIZE); + snprintf(name, sizeof(name), "xscom-pbcq-spci-%d.%d", + phb->chip_id, phb->phb_id); + pnv_xscom_region_init(&pbcq->xscom_spci_regs, OBJECT(dev), + &pnv_pbcq_spci_xscom_ops, pbcq, name, + PNV_XSCOM_PBCQ_SPCI_SIZE); +} + +static int pnv_pbcq_dt_xscom(PnvXScomInterface *dev, void *fdt, + int xscom_offset) +{ + const char compat[] = "ibm,power8-pbcq"; + PnvPHB3 *phb = PNV_PBCQ(dev)->phb; + char *name; + int offset; + uint32_t lpc_pcba = PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id; + uint32_t reg[] = { + cpu_to_be32(lpc_pcba), + cpu_to_be32(PNV_XSCOM_PBCQ_NEST_SIZE), + cpu_to_be32(PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id), + cpu_to_be32(PNV_XSCOM_PBCQ_PCI_SIZE), + cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id), + cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_SIZE) + }; + + name = g_strdup_printf("pbcq@%x", lpc_pcba); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,phb-index", phb->phb_id))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", phb->chip_id))); + _FDT((fdt_setprop(fdt, offset, "compatible", compat, + sizeof(compat)))); + return 0; +} + +static void phb3_pbcq_instance_init(Object *obj) +{ + PnvPBCQState *pbcq = PNV_PBCQ(obj); + + object_property_add_link(obj, "phb", TYPE_PNV_PHB3, + (Object **)&pbcq->phb, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG, + &error_abort); +} + +static void pnv_pbcq_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + + xdc->dt_xscom = pnv_pbcq_dt_xscom; + + dc->realize = pnv_pbcq_realize; + dc->user_creatable = false; +} + +static const TypeInfo pnv_pbcq_type_info = { + .name = TYPE_PNV_PBCQ, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPBCQState), + .instance_init = phb3_pbcq_instance_init, + .class_init = pnv_pbcq_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_pbcq_register_types(void) +{ + type_register_static(&pnv_pbcq_type_info); +} + +type_init(pnv_pbcq_register_types) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c new file mode 100644 index 0000000..23cf093 --- /dev/null +++ b/hw/pci-host/pnv_phb4.c @@ -0,0 +1,1439 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2018-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/visitor.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "monitor/monitor.h" +#include "target/ppc/cpu.h" +#include "hw/pci-host/pnv_phb4_regs.h" +#include "hw/pci-host/pnv_phb4.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" + +#define phb_error(phb, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n", \ + (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__) + +/* + * QEMU version of the GETFIELD/SETFIELD macros + * + * These are common with the PnvXive model. + */ +static inline uint64_t GETFIELD(uint64_t mask, uint64_t word) +{ + return (word & mask) >> ctz64(mask); +} + +static inline uint64_t SETFIELD(uint64_t mask, uint64_t word, + uint64_t value) +{ + return (word & ~mask) | ((value << ctz64(mask)) & mask); +} + +static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3]; + uint8_t bus, devfn; + + if (!(addr >> 63)) { + return NULL; + } + bus = (addr >> 52) & 0xff; + devfn = (addr >> 44) & 0xff; + + /* We don't access the root complex this way */ + if (bus == 0 && devfn == 0) { + return NULL; + } + return pci_find_device(pci->bus, bus, devfn); +} + +/* + * The CONFIG_DATA register expects little endian accesses, but as the + * region is big endian, we have to swap the value. + */ +static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off, + unsigned size, uint64_t val) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + + pdev = pnv_phb4_find_cfg_dev(phb); + if (!pdev) { + return; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return; + } + switch (size) { + case 1: + break; + case 2: + val = bswap16(val); + break; + case 4: + val = bswap32(val); + break; + default: + g_assert_not_reached(); + } + pci_host_config_write_common(pdev, cfg_addr, limit, val, size); +} + +static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off, + unsigned size) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + uint64_t val; + + pdev = pnv_phb4_find_cfg_dev(phb); + if (!pdev) { + return ~0ull; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return ~0ull; + } + val = pci_host_config_read_common(pdev, cfg_addr, limit, size); + switch (size) { + case 1: + return val; + case 2: + return bswap16(val); + case 4: + return bswap32(val); + default: + g_assert_not_reached(); + } +} + +/* + * Root complex register accesses are memory mapped. + */ +static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off, + unsigned size, uint64_t val) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + PCIDevice *pdev; + + if (size != 4) { + phb_error(phb, "rc_config_write invalid size %d\n", size); + return; + } + + pdev = pci_find_device(pci->bus, 0, 0); + assert(pdev); + + pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE, + bswap32(val), 4); +} + +static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off, + unsigned size) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + PCIDevice *pdev; + uint64_t val; + + if (size != 4) { + phb_error(phb, "rc_config_read invalid size %d\n", size); + return ~0ull; + } + + pdev = pci_find_device(pci->bus, 0, 0); + assert(pdev); + + val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4); + return bswap32(val); +} + +static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index) +{ + uint64_t base, start, size, mbe0, mbe1; + MemoryRegion *parent; + char name[64]; + + /* Unmap first */ + if (memory_region_is_mapped(&phb->mr_mmio[index])) { + /* Should we destroy it in RCU friendly way... ? */ + memory_region_del_subregion(phb->mr_mmio[index].container, + &phb->mr_mmio[index]); + } + + /* Get table entry */ + mbe0 = phb->ioda_MBT[(index << 1)]; + mbe1 = phb->ioda_MBT[(index << 1) + 1]; + + if (!(mbe0 & IODA3_MBT0_ENABLE)) { + return; + } + + /* Grab geometry from registers */ + base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12; + size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12; + size |= 0xff00000000000000ull; + size = ~size + 1; + + /* Calculate PCI side start address based on M32/M64 window type */ + if (mbe0 & IODA3_MBT0_TYPE_M32) { + start = phb->regs[PHB_M32_START_ADDR >> 3]; + if ((start + size) > 0x100000000ull) { + phb_error(phb, "M32 set beyond 4GB boundary !"); + size = 0x100000000 - start; + } + } else { + start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]); + } + + /* TODO: Figure out how to implemet/decode AOMASK */ + + /* Check if it matches an enabled MMIO region in the PEC stack */ + if (memory_region_is_mapped(&phb->stack->mmbar0) && + base >= phb->stack->mmio0_base && + (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) { + parent = &phb->stack->mmbar0; + base -= phb->stack->mmio0_base; + } else if (memory_region_is_mapped(&phb->stack->mmbar1) && + base >= phb->stack->mmio1_base && + (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) { + parent = &phb->stack->mmbar1; + base -= phb->stack->mmio1_base; + } else { + phb_error(phb, "PHB MBAR %d out of parent bounds", index); + return; + } + + /* Create alias (better name ?) */ + snprintf(name, sizeof(name), "phb4-mbar%d", index); + memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name, + &phb->pci_mmio, start, size); + memory_region_add_subregion(parent, base, &phb->mr_mmio[index]); +} + +static void pnv_phb4_check_all_mbt(PnvPHB4 *phb) +{ + uint64_t i; + uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS : + PNV_PHB4_MIN_MMIO_WINDOWS; + + for (i = 0; i < num_windows; i++) { + pnv_phb4_check_mbt(phb, i); + } +} + +static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb, + unsigned *out_table, unsigned *out_idx) +{ + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; + unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg); + unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg); + unsigned int mask; + uint64_t *tptr = NULL; + + switch (table) { + case IODA3_TBL_LIST: + tptr = phb->ioda_LIST; + mask = 7; + break; + case IODA3_TBL_MIST: + tptr = phb->ioda_MIST; + mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1); + mask -= 1; + break; + case IODA3_TBL_RCAM: + mask = phb->big_phb ? 127 : 63; + break; + case IODA3_TBL_MRT: + mask = phb->big_phb ? 15 : 7; + break; + case IODA3_TBL_PESTA: + case IODA3_TBL_PESTB: + mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1); + mask -= 1; + break; + case IODA3_TBL_TVT: + tptr = phb->ioda_TVT; + mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1); + mask -= 1; + break; + case IODA3_TBL_TCR: + case IODA3_TBL_TDR: + mask = phb->big_phb ? 1023 : 511; + break; + case IODA3_TBL_MBT: + tptr = phb->ioda_MBT; + mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1); + mask -= 1; + break; + case IODA3_TBL_MDT: + tptr = phb->ioda_MDT; + mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1); + mask -= 1; + break; + case IODA3_TBL_PEEV: + tptr = phb->ioda_PEEV; + mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1); + mask -= 1; + break; + default: + phb_error(phb, "invalid IODA table %d", table); + return NULL; + } + index &= mask; + if (out_idx) { + *out_idx = index; + } + if (out_table) { + *out_table = table; + } + if (tptr) { + tptr += index; + } + if (adreg & PHB_IODA_AD_AUTOINC) { + index = (index + 1) & mask; + adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index); + } + + phb->regs[PHB_IODA_ADDR >> 3] = adreg; + return tptr; +} + +static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb) +{ + unsigned table, idx; + uint64_t *tptr; + + tptr = pnv_phb4_ioda_access(phb, &table, &idx); + if (!tptr) { + /* Special PESTA case */ + if (table == IODA3_TBL_PESTA) { + return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63; + } else if (table == IODA3_TBL_PESTB) { + return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62; + } + /* Return 0 on unsupported tables, not ff's */ + return 0; + } + return *tptr; +} + +static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val) +{ + unsigned table, idx; + uint64_t *tptr; + + tptr = pnv_phb4_ioda_access(phb, &table, &idx); + if (!tptr) { + /* Special PESTA case */ + if (table == IODA3_TBL_PESTA) { + phb->ioda_PEST_AB[idx] &= ~1; + phb->ioda_PEST_AB[idx] |= (val >> 63) & 1; + } else if (table == IODA3_TBL_PESTB) { + phb->ioda_PEST_AB[idx] &= ~2; + phb->ioda_PEST_AB[idx] |= (val >> 62) & 2; + } + return; + } + + /* Handle side effects */ + switch (table) { + case IODA3_TBL_LIST: + break; + case IODA3_TBL_MIST: { + /* Special mask for MIST partial write */ + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; + uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg); + uint64_t v = *tptr; + if (mmask == 0) { + mmask = 0xf; + } + if (mmask & 8) { + v &= 0x0000ffffffffffffull; + v |= 0xcfff000000000000ull & val; + } + if (mmask & 4) { + v &= 0xffff0000ffffffffull; + v |= 0x0000cfff00000000ull & val; + } + if (mmask & 2) { + v &= 0xffffffff0000ffffull; + v |= 0x00000000cfff0000ull & val; + } + if (mmask & 1) { + v &= 0xffffffffffff0000ull; + v |= 0x000000000000cfffull & val; + } + *tptr = val; + break; + } + case IODA3_TBL_MBT: + *tptr = val; + + /* Copy accross the valid bit to the other half */ + phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull; + phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val; + + /* Update mappings */ + pnv_phb4_check_mbt(phb, idx >> 1); + break; + default: + *tptr = val; + } +} + +static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val) +{ + PnvPhb4DMASpace *ds; + + /* Always invalidate all for now ... */ + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + ds->pe_num = PHB_INVALID_PE; + } +} + +static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds) +{ + uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3]; + + if (cfg & PHB_PHB4C_32BIT_MSI_EN) { + if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + 0xffff0000, &ds->msi32_mr); + } + } else { + if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi32_mr); + } + } + + if (cfg & PHB_PHB4C_64BIT_MSI_EN) { + if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + (1ull << 60), &ds->msi64_mr); + } + } else { + if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi64_mr); + } + } +} + +static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb) +{ + PnvPhb4DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + pnv_phb4_update_msi_regions(ds); + } +} + +static void pnv_phb4_update_xsrc(PnvPHB4 *phb) +{ + int shift, flags, i, lsi_base; + XiveSource *xsrc = &phb->xsrc; + + /* The XIVE source characteristics can be set at run time */ + if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) { + shift = XIVE_ESB_64K; + } else { + shift = XIVE_ESB_4K; + } + if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) { + flags = XIVE_SRC_STORE_EOI; + } else { + flags = 0; + } + + phb->xsrc.esb_shift = shift; + phb->xsrc.esb_flags = flags; + + lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]); + lsi_base <<= 3; + + /* TODO: handle reset values of PHB_LSI_SRC_ID */ + if (!lsi_base) { + return; + } + + /* TODO: need a xive_source_irq_reset_lsi() */ + bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs); + + for (i = 0; i < xsrc->nr_irqs; i++) { + bool msi = (i < lsi_base || i >= (lsi_base + 8)); + if (!msi) { + xive_source_irq_set_lsi(xsrc, i); + } + } +} + +static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val, + unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + bool changed; + + /* Special case outbound configuration data */ + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + pnv_phb4_config_write(phb, off & 0x3, size, val); + return; + } + + /* Special case RC configuration space */ + if ((off & 0xf800) == PHB_RC_CONFIG_BASE) { + pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val); + return; + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return; + } + + /* Handle masking */ + switch (off) { + case PHB_LSI_SOURCE_ID: + val &= PHB_LSI_SRC_ID; + break; + case PHB_M64_UPPER_BITS: + val &= 0xff00000000000000ull; + break; + /* TCE Kill */ + case PHB_TCE_KILL: + /* Clear top 3 bits which HW does to indicate successful queuing */ + val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE); + break; + case PHB_Q_DMA_R: + /* + * This is enough logic to make SW happy but we aren't + * actually quiescing the DMAs + */ + if (val & PHB_Q_DMA_R_AUTORESET) { + val = 0; + } else { + val &= PHB_Q_DMA_R_QUIESCE_DMA; + } + break; + /* LEM stuff */ + case PHB_LEM_FIR_AND_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val; + return; + case PHB_LEM_FIR_OR_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val; + return; + case PHB_LEM_ERROR_AND_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val; + return; + case PHB_LEM_ERROR_OR_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val; + return; + case PHB_LEM_WOF: + val = 0; + break; + /* TODO: More regs ..., maybe create a table with masks... */ + + /* Read only registers */ + case PHB_CPU_LOADSTORE_STATUS: + case PHB_ETU_ERR_SUMMARY: + case PHB_PHB4_GEN_CAP: + case PHB_PHB4_TCE_CAP: + case PHB_PHB4_IRQ_CAP: + case PHB_PHB4_EEH_CAP: + return; + } + + /* Record whether it changed */ + changed = phb->regs[off >> 3] != val; + + /* Store in register cache first */ + phb->regs[off >> 3] = val; + + /* Handle side effects */ + switch (off) { + case PHB_PHB4_CONFIG: + if (changed) { + pnv_phb4_update_all_msi_regions(phb); + } + break; + case PHB_M32_START_ADDR: + case PHB_M64_UPPER_BITS: + if (changed) { + pnv_phb4_check_all_mbt(phb); + } + break; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + pnv_phb4_ioda_write(phb, val); + break; + + /* RTC invalidation */ + case PHB_RTC_INVALIDATE: + pnv_phb4_rtc_invalidate(phb, val); + break; + + /* PHB Control (Affects XIVE source) */ + case PHB_CTRLR: + case PHB_LSI_SOURCE_ID: + pnv_phb4_update_xsrc(phb); + break; + + /* Silent simple writes */ + case PHB_ASN_CMPM: + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + case PHB_TCE_TAG_ENABLE: + case PHB_INT_NOTIFY_ADDR: + case PHB_INT_NOTIFY_INDEX: + case PHB_DMARD_SYNC: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } +} + +static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint64_t val; + + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + return pnv_phb4_config_read(phb, off & 0x3, size); + } + + /* Special case RC configuration space */ + if ((off & 0xf800) == PHB_RC_CONFIG_BASE) { + return pnv_phb4_rc_config_read(phb, off & 0x7ff, size); + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return ~0ull; + } + + /* Default read from cache */ + val = phb->regs[off >> 3]; + + switch (off) { + case PHB_VERSION: + return phb->version; + + /* Read-only */ + case PHB_PHB4_GEN_CAP: + return 0xe4b8000000000000ull; + case PHB_PHB4_TCE_CAP: + return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull; + case PHB_PHB4_IRQ_CAP: + return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull; + case PHB_PHB4_EEH_CAP: + return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + return pnv_phb4_ioda_read(phb); + + /* Link training always appears trained */ + case PHB_PCIE_DLP_TRAIN_CTL: + /* TODO: Do something sensible with speed ? */ + return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT; + + /* DMA read sync: make it look like it's complete */ + case PHB_DMARD_SYNC: + return PHB_DMARD_SYNC_COMPLETE; + + /* Silent simple reads */ + case PHB_LSI_SOURCE_ID: + case PHB_CPU_LOADSTORE_STATUS: + case PHB_ASN_CMPM: + case PHB_PHB4_CONFIG: + case PHB_M32_START_ADDR: + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_RTC_INVALIDATE: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_M64_UPPER_BITS: + case PHB_CTRLR: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + case PHB_TCE_TAG_ENABLE: + case PHB_INT_NOTIFY_ADDR: + case PHB_INT_NOTIFY_INDEX: + case PHB_Q_DMA_R: + case PHB_ETU_ERR_SUMMARY: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } + return val; +} + +static const MemoryRegionOps pnv_phb4_reg_ops = { + .read = pnv_phb4_reg_read, + .write = pnv_phb4_reg_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint32_t reg = addr >> 3; + uint64_t val; + hwaddr offset; + + switch (reg) { + case PHB_SCOM_HV_IND_ADDR: + return phb->scom_hv_ind_addr_reg; + + case PHB_SCOM_HV_IND_DATA: + if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) { + phb_error(phb, "Invalid indirect address"); + return ~0ull; + } + size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8; + offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg); + val = pnv_phb4_reg_read(phb, offset, size); + if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) { + offset += size; + offset &= 0x3fff; + phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, + phb->scom_hv_ind_addr_reg, + offset); + } + return val; + case PHB_SCOM_ETU_LEM_FIR: + case PHB_SCOM_ETU_LEM_FIR_AND: + case PHB_SCOM_ETU_LEM_FIR_OR: + case PHB_SCOM_ETU_LEM_FIR_MSK: + case PHB_SCOM_ETU_LEM_ERR_MSK_AND: + case PHB_SCOM_ETU_LEM_ERR_MSK_OR: + case PHB_SCOM_ETU_LEM_ACT0: + case PHB_SCOM_ETU_LEM_ACT1: + case PHB_SCOM_ETU_LEM_WOF: + offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM; + return pnv_phb4_reg_read(phb, offset, size); + case PHB_SCOM_ETU_PMON_CONFIG: + case PHB_SCOM_ETU_PMON_CTR0: + case PHB_SCOM_ETU_PMON_CTR1: + case PHB_SCOM_ETU_PMON_CTR2: + case PHB_SCOM_ETU_PMON_CTR3: + offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG; + return pnv_phb4_reg_read(phb, offset, size); + + default: + qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr); + return ~0ull; + } +} + +static void pnv_phb4_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint32_t reg = addr >> 3; + hwaddr offset; + + switch (reg) { + case PHB_SCOM_HV_IND_ADDR: + phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff; + break; + case PHB_SCOM_HV_IND_DATA: + if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) { + phb_error(phb, "Invalid indirect address"); + break; + } + size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8; + offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg); + pnv_phb4_reg_write(phb, offset, val, size); + if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) { + offset += size; + offset &= 0x3fff; + phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, + phb->scom_hv_ind_addr_reg, + offset); + } + break; + case PHB_SCOM_ETU_LEM_FIR: + case PHB_SCOM_ETU_LEM_FIR_AND: + case PHB_SCOM_ETU_LEM_FIR_OR: + case PHB_SCOM_ETU_LEM_FIR_MSK: + case PHB_SCOM_ETU_LEM_ERR_MSK_AND: + case PHB_SCOM_ETU_LEM_ERR_MSK_OR: + case PHB_SCOM_ETU_LEM_ACT0: + case PHB_SCOM_ETU_LEM_ACT1: + case PHB_SCOM_ETU_LEM_WOF: + offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM; + pnv_phb4_reg_write(phb, offset, val, size); + break; + case PHB_SCOM_ETU_PMON_CONFIG: + case PHB_SCOM_ETU_PMON_CTR0: + case PHB_SCOM_ETU_PMON_CTR1: + case PHB_SCOM_ETU_PMON_CTR2: + case PHB_SCOM_ETU_PMON_CTR3: + offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG; + pnv_phb4_reg_write(phb, offset, val, size); + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +const MemoryRegionOps pnv_phb4_xscom_ops = { + .read = pnv_phb4_xscom_read, + .write = pnv_phb4_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num) +{ + /* Check that out properly ... */ + return irq_num & 3; +} + +static void pnv_phb4_set_irq(void *opaque, int irq_num, int level) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint32_t lsi_base; + + /* LSI only ... */ + if (irq_num > 3) { + phb_error(phb, "IRQ %x is not an LSI", irq_num); + } + lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]); + lsi_base <<= 3; + qemu_set_irq(phb->qirqs[lsi_base + irq_num], level); +} + +static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds) +{ + uint64_t rtt, addr; + uint16_t rte; + int bus_num; + int num_PEs; + + /* Already resolved ? */ + if (ds->pe_num != PHB_INVALID_PE) { + return true; + } + + /* We need to lookup the RTT */ + rtt = ds->phb->regs[PHB_RTT_BAR >> 3]; + if (!(rtt & PHB_RTT_BAR_ENABLE)) { + phb_error(ds->phb, "DMA with RTT BAR disabled !"); + /* Set error bits ? fence ? ... */ + return false; + } + + /* Read RTE */ + bus_num = pci_bus_num(ds->bus); + addr = rtt & PHB_RTT_BASE_ADDRESS_MASK; + addr += 2 * ((bus_num << 8) | ds->devfn); + if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) { + phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr); + /* Set error bits ? fence ? ... */ + return false; + } + rte = be16_to_cpu(rte); + + /* Fail upon reading of invalid PE# */ + num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1); + if (rte >= num_PEs) { + phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte); + rte &= num_PEs - 1; + } + ds->pe_num = rte; + return true; +} + +static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr, + bool is_write, uint64_t tve, + IOMMUTLBEntry *tlb) +{ + uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve); + int32_t lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve); + uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve); + uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve); + + /* Invalid levels */ + if (lev > 4) { + phb_error(ds->phb, "Invalid #levels in TVE %d", lev); + return; + } + + /* Invalid entry */ + if (tts == 0) { + phb_error(ds->phb, "Access to invalid TVE"); + return; + } + + /* IO Page Size of 0 means untranslated, else use TCEs */ + if (tps == 0) { + /* TODO: Handle boundaries */ + + /* Use 4k pages like q35 ... for now */ + tlb->iova = addr & 0xfffffffffffff000ull; + tlb->translated_addr = addr & 0x0003fffffffff000ull; + tlb->addr_mask = 0xfffull; + tlb->perm = IOMMU_RW; + } else { + uint32_t tce_shift, tbl_shift, sh; + uint64_t base, taddr, tce, tce_mask; + + /* Address bits per bottom level TCE entry */ + tce_shift = tps + 11; + + /* Address bits per table level */ + tbl_shift = tts + 8; + + /* Top level table base address */ + base = tta << 12; + + /* Total shift to first level */ + sh = tbl_shift * lev + tce_shift; + + /* TODO: Limit to support IO page sizes */ + + /* TODO: Multi-level untested */ + while ((lev--) >= 0) { + /* Grab the TCE address */ + taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); + if (dma_memory_read(&address_space_memory, taddr, &tce, + sizeof(tce))) { + phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr); + return; + } + tce = be64_to_cpu(tce); + + /* Check permission for indirect TCE */ + if ((lev >= 0) && !(tce & 3)) { + phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr); + phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + return; + } + sh -= tbl_shift; + base = tce & ~0xfffull; + } + + /* We exit the loop with TCE being the final TCE */ + tce_mask = ~((1ull << tce_shift) - 1); + tlb->iova = addr & tce_mask; + tlb->translated_addr = tce & tce_mask; + tlb->addr_mask = ~tce_mask; + tlb->perm = tce & 3; + if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { + phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr); + phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + } + } +} + +static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr); + int tve_sel; + uint64_t tve, cfg; + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + + /* Resolve PE# */ + if (!pnv_phb4_resolve_pe(ds)) { + phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return ret; + } + + /* Check top bits */ + switch (addr >> 60) { + case 00: + /* DMA or 32-bit MSI ? */ + cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3]; + if ((cfg & PHB_PHB4C_32BIT_MSI_EN) && + ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) { + phb_error(ds->phb, "xlate on 32-bit MSI region"); + return ret; + } + /* Choose TVE XXX Use PHB4 Control Register */ + tve_sel = (addr >> 59) & 1; + tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel]; + pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret); + break; + case 01: + phb_error(ds->phb, "xlate on 64-bit MSI region"); + break; + default: + phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr); + } + return ret; +} + +#define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region" +#define PNV_PHB4_IOMMU_MEMORY_REGION(obj) \ + OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB4_IOMMU_MEMORY_REGION) + +static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = pnv_phb4_translate_iommu; +} + +static const TypeInfo pnv_phb4_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION, + .class_init = pnv_phb4_iommu_memory_region_class_init, +}; + +/* + * MSI/MSIX memory region implementation. + * The handler handles both MSI and MSIX. + */ +static void pnv_phb4_msi_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + PnvPhb4DMASpace *ds = opaque; + PnvPHB4 *phb = ds->phb; + + uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f); + + /* Resolve PE# */ + if (!pnv_phb4_resolve_pe(ds)) { + phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return; + } + + /* TODO: Check it doesn't collide with LSIs */ + if (src >= phb->xsrc.nr_irqs) { + phb_error(phb, "MSI %d out of bounds", src); + return; + } + + /* TODO: check PE/MSI assignement */ + + qemu_irq_pulse(phb->qirqs[src]); +} + +/* There is no .read as the read result is undefined by PCI spec */ +static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPhb4DMASpace *ds = opaque; + + phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr); + return -1; +} + +static const MemoryRegionOps pnv_phb4_msi_ops = { + .read = pnv_phb4_msi_read, + .write = pnv_phb4_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN +}; + +static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn) +{ + PnvPhb4DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + if (ds->bus == bus && ds->devfn == devfn) { + break; + } + } + return ds; +} + +static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ + PnvPHB4 *phb = opaque; + PnvPhb4DMASpace *ds; + char name[32]; + + ds = pnv_phb4_dma_find(phb, bus, devfn); + + if (ds == NULL) { + ds = g_malloc0(sizeof(PnvPhb4DMASpace)); + ds->bus = bus; + ds->devfn = devfn; + ds->pe_num = PHB_INVALID_PE; + ds->phb = phb; + snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id, + phb->phb_id); + memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr), + TYPE_PNV_PHB4_IOMMU_MEMORY_REGION, + OBJECT(phb), name, UINT64_MAX); + address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr), + name); + memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops, + ds, "msi32", 0x10000); + memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops, + ds, "msi64", 0x100000); + pnv_phb4_update_msi_regions(ds); + + QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list); + } + return &ds->dma_as; +} + +static void pnv_phb4_instance_init(Object *obj) +{ + PnvPHB4 *phb = PNV_PHB4(obj); + + QLIST_INIT(&phb->dma_spaces); + + /* XIVE interrupt source object */ + object_initialize_child(obj, "source", &phb->xsrc, sizeof(XiveSource), + TYPE_XIVE_SOURCE, &error_abort, NULL); + + /* Root Port */ + object_initialize_child(obj, "root", &phb->root, sizeof(phb->root), + TYPE_PNV_PHB4_ROOT_PORT, &error_abort, NULL); + + qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0)); + qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false); +} + +static void pnv_phb4_realize(DeviceState *dev, Error **errp) +{ + PnvPHB4 *phb = PNV_PHB4(dev); + PCIHostState *pci = PCI_HOST_BRIDGE(dev); + XiveSource *xsrc = &phb->xsrc; + Error *local_err = NULL; + int nr_irqs; + char name[32]; + + assert(phb->stack); + + /* Set the "big_phb" flag */ + phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3; + + /* Controller Registers */ + snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id, + phb->phb_id); + memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb, + name, 0x2000); + + /* + * PHB4 doesn't support IO space. However, qemu gets very upset if + * we don't have an IO region to anchor IO BARs onto so we just + * initialize one which we never hook up to anything + */ + + snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id, + phb->phb_id); + memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000); + + snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id, + phb->phb_id); + memory_region_init(&phb->pci_mmio, OBJECT(phb), name, + PCI_MMIO_TOTAL_SIZE); + + pci->bus = pci_register_root_bus(dev, "root-bus", + pnv_phb4_set_irq, pnv_phb4_map_irq, phb, + &phb->pci_mmio, &phb->pci_io, + 0, 4, TYPE_PNV_PHB4_ROOT_BUS); + pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb); + + /* Add a single Root port */ + qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id); + qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id); + qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus)); + qdev_init_nofail(DEVICE(&phb->root)); + + /* Setup XIVE Source */ + if (phb->big_phb) { + nr_irqs = PNV_PHB4_MAX_INTs; + } else { + nr_irqs = PNV_PHB4_MAX_INTs >> 1; + } + object_property_set_int(OBJECT(xsrc), nr_irqs, "nr-irqs", &error_fatal); + object_property_set_link(OBJECT(xsrc), OBJECT(phb), "xive", &error_fatal); + object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pnv_phb4_update_xsrc(phb); + + phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs); +} + +static void pnv_phb4_reset(DeviceState *dev) +{ + PnvPHB4 *phb = PNV_PHB4(dev); + PCIDevice *root_dev = PCI_DEVICE(&phb->root); + + /* + * Configure PCI device id at reset using a property. + */ + pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM); + pci_config_set_device_id(root_dev->config, phb->device_id); +} + +static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + PnvPHB4 *phb = PNV_PHB4(host_bridge); + + snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x", + phb->chip_id, phb->phb_id); + return phb->bus_path; +} + +static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno) +{ + PnvPHB4 *phb = PNV_PHB4(xf); + uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3]; + uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3]; + uint64_t data = XIVE_TRIGGER_PQ | offset | srcno; + MemTxResult result; + + address_space_stq_be(&address_space_memory, notif_port, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port); + return; + } +} + +static Property pnv_phb4_properties[] = { + DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), + DEFINE_PROP_UINT64("version", PnvPHB4, version, 0), + DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0), + DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK, + PnvPhb4PecStack *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_phb4_class_init(ObjectClass *klass, void *data) +{ + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass); + + hc->root_bus_path = pnv_phb4_root_bus_path; + dc->realize = pnv_phb4_realize; + device_class_set_props(dc, pnv_phb4_properties); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->user_creatable = false; + dc->reset = pnv_phb4_reset; + + xfc->notify = pnv_phb4_xive_notify; +} + +static const TypeInfo pnv_phb4_type_info = { + .name = TYPE_PNV_PHB4, + .parent = TYPE_PCIE_HOST_BRIDGE, + .instance_init = pnv_phb4_instance_init, + .instance_size = sizeof(PnvPHB4), + .class_init = pnv_phb4_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XIVE_NOTIFIER }, + { }, + } +}; + +static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + /* + * PHB4 has only a single root complex. Enforce the limit on the + * parent bus + */ + k->max_dev = 1; +} + +static const TypeInfo pnv_phb4_root_bus_info = { + .name = TYPE_PNV_PHB4_ROOT_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pnv_phb4_root_bus_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { } + }, +}; + +static void pnv_phb4_root_port_reset(DeviceState *dev) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + PCIDevice *d = PCI_DEVICE(dev); + uint8_t *conf = d->config; + + rpc->parent_reset(dev); + + pci_byte_test_and_set_mask(conf + PCI_IO_BASE, + PCI_IO_RANGE_MASK & 0xff); + pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT, + PCI_IO_RANGE_MASK & 0xff); + pci_set_word(conf + PCI_MEMORY_BASE, 0); + pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0); + pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1); + pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1); + pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */ + pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff); +} + +static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + Error *local_err = NULL; + + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass); + + dc->desc = "IBM PHB4 PCIE Root Port"; + dc->user_creatable = false; + + device_class_set_parent_realize(dc, pnv_phb4_root_port_realize, + &rpc->parent_realize); + device_class_set_parent_reset(dc, pnv_phb4_root_port_reset, + &rpc->parent_reset); + + k->vendor_id = PCI_VENDOR_ID_IBM; + k->device_id = PNV_PHB4_DEVICE_ID; + k->revision = 0; + + rpc->exp_offset = 0x48; + rpc->aer_offset = 0x100; + + dc->reset = &pnv_phb4_root_port_reset; +} + +static const TypeInfo pnv_phb4_root_port_info = { + .name = TYPE_PNV_PHB4_ROOT_PORT, + .parent = TYPE_PCIE_ROOT_PORT, + .instance_size = sizeof(PnvPHB4RootPort), + .class_init = pnv_phb4_root_port_class_init, +}; + +static void pnv_phb4_register_types(void) +{ + type_register_static(&pnv_phb4_root_bus_info); + type_register_static(&pnv_phb4_root_port_info); + type_register_static(&pnv_phb4_type_info); + type_register_static(&pnv_phb4_iommu_memory_region_info); +} + +type_init(pnv_phb4_register_types); + +void pnv_phb4_update_regions(PnvPhb4PecStack *stack) +{ + PnvPHB4 *phb = &stack->phb; + + /* Unmap first always */ + if (memory_region_is_mapped(&phb->mr_regs)) { + memory_region_del_subregion(&stack->phbbar, &phb->mr_regs); + } + if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) { + memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio); + } + + /* Map registers if enabled */ + if (memory_region_is_mapped(&stack->phbbar)) { + memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs); + } + + /* Map ESB if enabled */ + if (memory_region_is_mapped(&stack->intbar)) { + memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio); + } + + /* Check/update m32 */ + pnv_phb4_check_all_mbt(phb); +} + +void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon) +{ + uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3]; + + monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n", + phb->chip_id, phb->phb_id, + offset, offset + phb->xsrc.nr_irqs - 1); + xive_source_pic_print_info(&phb->xsrc, 0, mon); +} diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c new file mode 100644 index 0000000..68e1db3 --- /dev/null +++ b/hw/pci-host/pnv_phb4_pec.c @@ -0,0 +1,595 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2018-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/log.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/fdt.h" +#include "hw/pci-host/pnv_phb4_regs.h" +#include "hw/pci-host/pnv_phb4.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "hw/ppc/pnv.h" +#include "hw/qdev-properties.h" + +#include <libfdt.h> + +#define phb_pec_error(pec, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n", \ + (pec)->chip_id, (pec)->index, ## __VA_ARGS__) + + +static uint64_t pnv_pec_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return pec->nest_regs[reg]; +} + +static void pnv_pec_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_NEST_PBCQ_HW_CONFIG: + case PEC_NEST_DROP_PRIO_CTRL: + case PEC_NEST_PBCQ_ERR_INJECT: + case PEC_NEST_PCI_NEST_CLK_TRACE_CTL: + case PEC_NEST_PBCQ_PMON_CTRL: + case PEC_NEST_PBCQ_PBUS_ADDR_EXT: + case PEC_NEST_PBCQ_PRED_VEC_TIMEOUT: + case PEC_NEST_CAPP_CTRL: + case PEC_NEST_PBCQ_READ_STK_OVR: + case PEC_NEST_PBCQ_WRITE_STK_OVR: + case PEC_NEST_PBCQ_STORE_STK_OVR: + case PEC_NEST_PBCQ_RETRY_BKOFF_CTRL: + pec->nest_regs[reg] = val; + break; + default: + phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__, + addr, val); + } +} + +static const MemoryRegionOps pnv_pec_nest_xscom_ops = { + .read = pnv_pec_nest_xscom_read, + .write = pnv_pec_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return pec->pci_regs[reg]; +} + +static void pnv_pec_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_PCI_PBAIB_HW_CONFIG: + case PEC_PCI_PBAIB_READ_STK_OVR: + pec->pci_regs[reg] = val; + break; + default: + phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__, + addr, val); + } +} + +static const MemoryRegionOps pnv_pec_pci_xscom_ops = { + .read = pnv_pec_pci_xscom_read, + .write = pnv_pec_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return stack->nest_regs[reg]; +} + +static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack) +{ + PnvPhb4PecState *pec = stack->pec; + MemoryRegion *sysmem = pec->system_memory; + uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN]; + uint64_t bar, mask, size; + char name[64]; + + /* + * NOTE: This will really not work well if those are remapped + * after the PHB has created its sub regions. We could do better + * if we had a way to resize regions but we don't really care + * that much in practice as the stuff below really only happens + * once early during boot + */ + + /* Handle unmaps */ + if (memory_region_is_mapped(&stack->mmbar0) && + !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { + memory_region_del_subregion(sysmem, &stack->mmbar0); + } + if (memory_region_is_mapped(&stack->mmbar1) && + !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { + memory_region_del_subregion(sysmem, &stack->mmbar1); + } + if (memory_region_is_mapped(&stack->phbbar) && + !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) { + memory_region_del_subregion(sysmem, &stack->phbbar); + } + if (memory_region_is_mapped(&stack->intbar) && + !(bar_en & PEC_NEST_STK_BAR_EN_INT)) { + memory_region_del_subregion(sysmem, &stack->intbar); + } + + /* Update PHB */ + pnv_phb4_update_regions(stack); + + /* Handle maps */ + if (!memory_region_is_mapped(&stack->mmbar0) && + (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { + bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8; + mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK]; + size = ((~mask) >> 8) + 1; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->mmbar0, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->mmbar0); + stack->mmio0_base = bar; + stack->mmio0_size = size; + } + if (!memory_region_is_mapped(&stack->mmbar1) && + (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { + bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8; + mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK]; + size = ((~mask) >> 8) + 1; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->mmbar1, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->mmbar1); + stack->mmio1_base = bar; + stack->mmio1_size = size; + } + if (!memory_region_is_mapped(&stack->phbbar) && + (bar_en & PEC_NEST_STK_BAR_EN_PHB)) { + bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8; + size = PNV_PHB4_NUM_REGS << 3; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->phbbar, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->phbbar); + } + if (!memory_region_is_mapped(&stack->intbar) && + (bar_en & PEC_NEST_STK_BAR_EN_INT)) { + bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8; + size = PNV_PHB4_MAX_INTs << 16; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int", + stack->pec->chip_id, stack->pec->index, stack->stack_no); + memory_region_init(&stack->intbar, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->intbar); + } + + /* Update PHB */ + pnv_phb4_update_regions(stack); +} + +static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + PnvPhb4PecState *pec = stack->pec; + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_NEST_STK_PCI_NEST_FIR: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_CLR: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_SET: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSK: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSKC: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSKS: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_ACT0: + case PEC_NEST_STK_PCI_NEST_FIR_ACT1: + stack->nest_regs[reg] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_WOF: + stack->nest_regs[reg] = 0; + break; + case PEC_NEST_STK_ERR_REPORT_0: + case PEC_NEST_STK_ERR_REPORT_1: + case PEC_NEST_STK_PBCQ_GNRL_STATUS: + /* Flag error ? */ + break; + case PEC_NEST_STK_PBCQ_MODE: + stack->nest_regs[reg] = val & 0xff00000000000000ull; + break; + case PEC_NEST_STK_MMIO_BAR0: + case PEC_NEST_STK_MMIO_BAR0_MASK: + case PEC_NEST_STK_MMIO_BAR1: + case PEC_NEST_STK_MMIO_BAR1_MASK: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & + (PEC_NEST_STK_BAR_EN_MMIO0 | + PEC_NEST_STK_BAR_EN_MMIO1)) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xffffffffff000000ull; + break; + case PEC_NEST_STK_PHB_REGS_BAR: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xffffffffffc00000ull; + break; + case PEC_NEST_STK_INT_BAR: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xfffffff000000000ull; + break; + case PEC_NEST_STK_BAR_EN: + stack->nest_regs[reg] = val & 0xf000000000000000ull; + pnv_pec_stk_update_map(stack); + break; + case PEC_NEST_STK_DATA_FRZ_TYPE: + case PEC_NEST_STK_PBCQ_TUN_BAR: + /* Not used for now */ + stack->nest_regs[reg] = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = { + .read = pnv_pec_stk_nest_xscom_read, + .write = pnv_pec_stk_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return stack->pci_regs[reg]; +} + +static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_PCI_STK_PCI_FIR: + stack->nest_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_CLR: + stack->nest_regs[PEC_PCI_STK_PCI_FIR] &= val; + break; + case PEC_PCI_STK_PCI_FIR_SET: + stack->nest_regs[PEC_PCI_STK_PCI_FIR] |= val; + break; + case PEC_PCI_STK_PCI_FIR_MSK: + stack->nest_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_MSKC: + stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val; + break; + case PEC_PCI_STK_PCI_FIR_MSKS: + stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val; + break; + case PEC_PCI_STK_PCI_FIR_ACT0: + case PEC_PCI_STK_PCI_FIR_ACT1: + stack->nest_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_WOF: + stack->nest_regs[reg] = 0; + break; + case PEC_PCI_STK_ETU_RESET: + stack->nest_regs[reg] = val & 0x8000000000000000ull; + /* TODO: Implement reset */ + break; + case PEC_PCI_STK_PBAIB_ERR_REPORT: + break; + case PEC_PCI_STK_PBAIB_TX_CMD_CRED: + case PEC_PCI_STK_PBAIB_TX_DAT_CRED: + stack->nest_regs[reg] = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = { + .read = pnv_pec_stk_pci_xscom_read, + .write = pnv_pec_stk_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_pec_instance_init(Object *obj) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(obj); + int i; + + for (i = 0; i < PHB4_PEC_MAX_STACKS; i++) { + object_initialize_child(obj, "stack[*]", &pec->stacks[i], + sizeof(pec->stacks[i]), TYPE_PNV_PHB4_PEC_STACK, + &error_abort, NULL); + } +} + +static void pnv_pec_realize(DeviceState *dev, Error **errp) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(dev); + Error *local_err = NULL; + char name[64]; + int i; + + assert(pec->system_memory); + + /* Create stacks */ + for (i = 0; i < pec->num_stacks; i++) { + PnvPhb4PecStack *stack = &pec->stacks[i]; + Object *stk_obj = OBJECT(stack); + + object_property_set_int(stk_obj, i, "stack-no", &error_abort); + object_property_set_link(stk_obj, OBJECT(pec), "pec", &error_abort); + object_property_set_bool(stk_obj, true, "realized", errp); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + /* Initialize the XSCOM regions for the PEC registers */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest", pec->chip_id, + pec->index); + pnv_xscom_region_init(&pec->nest_regs_mr, OBJECT(dev), + &pnv_pec_nest_xscom_ops, pec, name, + PHB4_PEC_NEST_REGS_COUNT); + + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci", pec->chip_id, + pec->index); + pnv_xscom_region_init(&pec->pci_regs_mr, OBJECT(dev), + &pnv_pec_pci_xscom_ops, pec, name, + PHB4_PEC_PCI_REGS_COUNT); +} + +static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, + int xscom_offset) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(dev); + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(dev); + uint32_t nbase = pecc->xscom_nest_base(pec); + uint32_t pbase = pecc->xscom_pci_base(pec); + int offset, i; + char *name; + uint32_t reg[] = { + cpu_to_be32(nbase), + cpu_to_be32(pecc->xscom_nest_size), + cpu_to_be32(pbase), + cpu_to_be32(pecc->xscom_pci_size), + }; + + name = g_strdup_printf("pbcq@%x", nbase); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,pec-index", pec->index))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0))); + _FDT((fdt_setprop(fdt, offset, "compatible", pecc->compat, + pecc->compat_size))); + + for (i = 0; i < pec->num_stacks; i++) { + PnvPhb4PecStack *stack = &pec->stacks[i]; + PnvPHB4 *phb = &stack->phb; + int stk_offset; + + name = g_strdup_printf("stack@%x", i); + stk_offset = fdt_add_subnode(fdt, offset, name); + _FDT(stk_offset); + g_free(name); + _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat, + pecc->stk_compat_size))); + _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i))); + _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb->phb_id))); + } + + return 0; +} + +static Property pnv_pec_properties[] = { + DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), + DEFINE_PROP_UINT32("num-stacks", PnvPhb4PecState, num_stacks, 0), + DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), + DEFINE_PROP_LINK("system-memory", PnvPhb4PecState, system_memory, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), +}; + +static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec) +{ + return PNV9_XSCOM_PEC_PCI_BASE + 0x1000000 * pec->index; +} + +static uint32_t pnv_pec_xscom_nest_base(PnvPhb4PecState *pec) +{ + return PNV9_XSCOM_PEC_NEST_BASE + 0x400 * pec->index; +} + +static void pnv_pec_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_CLASS(klass); + static const char compat[] = "ibm,power9-pbcq"; + static const char stk_compat[] = "ibm,power9-phb-stack"; + + xdc->dt_xscom = pnv_pec_dt_xscom; + + dc->realize = pnv_pec_realize; + device_class_set_props(dc, pnv_pec_properties); + dc->user_creatable = false; + + pecc->xscom_nest_base = pnv_pec_xscom_nest_base; + pecc->xscom_pci_base = pnv_pec_xscom_pci_base; + pecc->xscom_nest_size = PNV9_XSCOM_PEC_NEST_SIZE; + pecc->xscom_pci_size = PNV9_XSCOM_PEC_PCI_SIZE; + pecc->compat = compat; + pecc->compat_size = sizeof(compat); + pecc->stk_compat = stk_compat; + pecc->stk_compat_size = sizeof(stk_compat); +} + +static const TypeInfo pnv_pec_type_info = { + .name = TYPE_PNV_PHB4_PEC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPhb4PecState), + .instance_init = pnv_pec_instance_init, + .class_init = pnv_pec_class_init, + .class_size = sizeof(PnvPhb4PecClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_pec_stk_instance_init(Object *obj) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(obj); + + object_initialize_child(obj, "phb", &stack->phb, sizeof(stack->phb), + TYPE_PNV_PHB4, &error_abort, NULL); +} + +static void pnv_pec_stk_realize(DeviceState *dev, Error **errp) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev); + PnvPhb4PecState *pec = stack->pec; + char name[64]; + + assert(pec); + + /* Initialize the XSCOM regions for the stack registers */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack), + &pnv_pec_stk_nest_xscom_ops, stack, name, + PHB4_PEC_NEST_STK_REGS_COUNT); + + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack), + &pnv_pec_stk_pci_xscom_ops, stack, name, + PHB4_PEC_PCI_STK_REGS_COUNT); + + /* PHB pass-through */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(&stack->phb), + &pnv_phb4_xscom_ops, &stack->phb, name, 0x40); + + /* + * Let the machine/chip realize the PHB object to customize more + * easily some fields + */ +} + +static Property pnv_pec_stk_properties[] = { + DEFINE_PROP_UINT32("stack-no", PnvPhb4PecStack, stack_no, 0), + DEFINE_PROP_LINK("pec", PnvPhb4PecStack, pec, TYPE_PNV_PHB4_PEC, + PnvPhb4PecState *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_pec_stk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, pnv_pec_stk_properties); + dc->realize = pnv_pec_stk_realize; + dc->user_creatable = false; + + /* TODO: reset regs ? */ +} + +static const TypeInfo pnv_pec_stk_type_info = { + .name = TYPE_PNV_PHB4_PEC_STACK, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPhb4PecStack), + .instance_init = pnv_pec_stk_instance_init, + .class_init = pnv_pec_stk_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_pec_register_types(void) +{ + type_register_static(&pnv_pec_type_info); + type_register_static(&pnv_pec_stk_type_info); +} + +type_init(pnv_pec_register_types); diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index e27efe9..354828b 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -135,6 +135,8 @@ config XIVE_SPAPR default y depends on PSERIES select XIVE + select PCI + select PCIE_PORT config XIVE_KVM bool diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index e61994c..139c857 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -40,6 +40,7 @@ #include "hw/intc/intc.h" #include "hw/ipmi/ipmi.h" #include "target/ppc/mmu-hash64.h" +#include "hw/pci/msi.h" #include "hw/ppc/xics.h" #include "hw/qdev-properties.h" @@ -615,16 +616,29 @@ static ISABus *pnv_isa_create(PnvChip *chip, Error **errp) static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon) { Pnv8Chip *chip8 = PNV8_CHIP(chip); + int i; ics_pic_print_info(&chip8->psi.ics, mon); + for (i = 0; i < chip->num_phbs; i++) { + pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon); + ics_pic_print_info(&chip8->phbs[i].lsis, mon); + } } static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon) { Pnv9Chip *chip9 = PNV9_CHIP(chip); + int i, j; pnv_xive_pic_print_info(&chip9->xive, mon); pnv_psi_pic_print_info(&chip9->psi, mon); + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + PnvPhb4PecState *pec = &chip9->pecs[i]; + for (j = 0; j < pec->num_stacks; j++) { + pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon); + } + } } static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip, @@ -716,7 +730,7 @@ static void pnv_init(MachineState *machine) exit(1); } - fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE); + fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE); if (fw_size < 0) { error_report("Could not load OPAL firmware '%s'", fw_filename); exit(1); @@ -748,6 +762,9 @@ static void pnv_init(MachineState *machine) } } + /* MSIs are supported on this platform */ + msi_nonbroken = true; + /* * Check compatibility of the specified CPU with the machine * default. @@ -1014,7 +1031,10 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, static void pnv_chip_power8_instance_init(Object *obj) { + PnvChip *chip = PNV_CHIP(obj); Pnv8Chip *chip8 = PNV8_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); + int i; object_property_add_link(obj, "xics", TYPE_XICS_FABRIC, (Object **)&chip8->xics, @@ -1033,6 +1053,17 @@ static void pnv_chip_power8_instance_init(Object *obj) object_initialize_child(obj, "homer", &chip8->homer, sizeof(chip8->homer), TYPE_PNV8_HOMER, &error_abort, NULL); + + for (i = 0; i < pcc->num_phbs; i++) { + object_initialize_child(obj, "phb[*]", &chip8->phbs[i], + sizeof(chip8->phbs[i]), TYPE_PNV_PHB3, + &error_abort, NULL); + } + + /* + * Number of PHBs is the chip default + */ + chip->num_phbs = pcc->num_phbs; } static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) @@ -1071,6 +1102,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) Pnv8Chip *chip8 = PNV8_CHIP(dev); Pnv8Psi *psi8 = &chip8->psi; Error *local_err = NULL; + int i; assert(chip8->xics); @@ -1151,6 +1183,33 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) /* Homer mmio region */ memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip), &chip8->homer.regs); + + /* PHB3 controllers */ + for (i = 0; i < chip->num_phbs; i++) { + PnvPHB3 *phb = &chip8->phbs[i]; + PnvPBCQState *pbcq = &phb->pbcq; + + object_property_set_int(OBJECT(phb), i, "index", &error_fatal); + object_property_set_int(OBJECT(phb), chip->chip_id, "chip-id", + &error_fatal); + object_property_set_bool(OBJECT(phb), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + qdev_set_parent_bus(DEVICE(phb), sysbus_get_default()); + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id, + &pbcq->xscom_nest_regs); + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id, + &pbcq->xscom_pci_regs); + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id, + &pbcq->xscom_spci_regs); + } } static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr) @@ -1166,6 +1225,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ k->cores_mask = POWER8E_CORE_MASK; + k->num_phbs = 3; k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; @@ -1189,6 +1249,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ k->cores_mask = POWER8_CORE_MASK; + k->num_phbs = 3; k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; @@ -1212,6 +1273,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ k->cores_mask = POWER8_CORE_MASK; + k->num_phbs = 3; k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; @@ -1230,7 +1292,10 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) static void pnv_chip_power9_instance_init(Object *obj) { + PnvChip *chip = PNV_CHIP(obj); Pnv9Chip *chip9 = PNV9_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); + int i; object_initialize_child(obj, "xive", &chip9->xive, sizeof(chip9->xive), TYPE_PNV_XIVE, &error_abort, NULL); @@ -1248,6 +1313,17 @@ static void pnv_chip_power9_instance_init(Object *obj) object_initialize_child(obj, "homer", &chip9->homer, sizeof(chip9->homer), TYPE_PNV9_HOMER, &error_abort, NULL); + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + object_initialize_child(obj, "pec[*]", &chip9->pecs[i], + sizeof(chip9->pecs[i]), TYPE_PNV_PHB4_PEC, + &error_abort, NULL); + } + + /* + * Number of PHBs is the chip default + */ + chip->num_phbs = pcc->num_phbs; } static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) @@ -1276,6 +1352,78 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) } } +static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + Error *local_err = NULL; + int i, j; + int phb_id = 0; + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + PnvPhb4PecState *pec = &chip9->pecs[i]; + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); + uint32_t pec_nest_base; + uint32_t pec_pci_base; + + object_property_set_int(OBJECT(pec), i, "index", &error_fatal); + /* + * PEC0 -> 1 stack + * PEC1 -> 2 stacks + * PEC2 -> 3 stacks + */ + object_property_set_int(OBJECT(pec), i + 1, "num-stacks", + &error_fatal); + object_property_set_int(OBJECT(pec), chip->chip_id, "chip-id", + &error_fatal); + object_property_set_link(OBJECT(pec), OBJECT(get_system_memory()), + "system-memory", &error_abort); + object_property_set_bool(OBJECT(pec), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pec_nest_base = pecc->xscom_nest_base(pec); + pec_pci_base = pecc->xscom_pci_base(pec); + + pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr); + pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr); + + for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs; + j++, phb_id++) { + PnvPhb4PecStack *stack = &pec->stacks[j]; + Object *obj = OBJECT(&stack->phb); + + object_property_set_int(obj, phb_id, "index", &error_fatal); + object_property_set_int(obj, chip->chip_id, "chip-id", + &error_fatal); + object_property_set_int(obj, PNV_PHB4_VERSION, "version", + &error_fatal); + object_property_set_int(obj, PNV_PHB4_DEVICE_ID, "device-id", + &error_fatal); + object_property_set_link(obj, OBJECT(stack), "stack", &error_abort); + object_property_set_bool(obj, true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + qdev_set_parent_bus(DEVICE(obj), sysbus_get_default()); + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(chip, + pec_nest_base + 0x40 * (stack->stack_no + 1), + &stack->nest_regs_mr); + pnv_xscom_add_subregion(chip, + pec_pci_base + 0x40 * (stack->stack_no + 1), + &stack->pci_regs_mr); + pnv_xscom_add_subregion(chip, + pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 + + 0x40 * stack->stack_no, + &stack->phb_regs_mr); + } + } +} + static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) { PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev); @@ -1378,6 +1526,13 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) /* Homer mmio region */ memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip), &chip9->homer.regs); + + /* PHBs */ + pnv_chip_power9_phb_realize(chip, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr) @@ -1404,6 +1559,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->xscom_core_base = pnv_chip_power9_xscom_core_base; k->xscom_pcba = pnv_chip_power9_xscom_pcba; dc->desc = "PowerNV Chip POWER9"; + k->num_phbs = 6; device_class_set_parent_realize(dc, pnv_chip_power9_realize, &k->parent_realize); @@ -1533,6 +1689,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); const char *typename = pnv_chip_core_typename(chip); int i, core_hwid; + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); if (!object_class_by_name(typename)) { error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename); @@ -1571,6 +1728,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) object_property_set_int(OBJECT(pnv_core), pcc->core_pir(chip, core_hwid), "pir", &error_fatal); + object_property_set_int(OBJECT(pnv_core), pnv->fw_load_addr, + "hrmor", &error_fatal); object_property_set_link(OBJECT(pnv_core), OBJECT(chip), "chip", &error_abort); object_property_set_bool(OBJECT(pnv_core), true, "realized", @@ -1605,6 +1764,7 @@ static Property pnv_chip_properties[] = { DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1), DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0), DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1), + DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -1638,14 +1798,23 @@ PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir) static ICSState *pnv_ics_get(XICSFabric *xi, int irq) { PnvMachineState *pnv = PNV_MACHINE(xi); - int i; + int i, j; for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]); if (ics_valid_irq(&chip8->psi.ics, irq)) { return &chip8->psi.ics; } + for (j = 0; j < chip->num_phbs; j++) { + if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) { + return &chip8->phbs[j].lsis; + } + if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) { + return ICS(&chip8->phbs[j].msis); + } + } } return NULL; } @@ -1653,11 +1822,17 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq) static void pnv_ics_resend(XICSFabric *xi) { PnvMachineState *pnv = PNV_MACHINE(xi); - int i; + int i, j; for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]); + ics_resend(&chip8->psi.ics); + for (j = 0; j < chip->num_phbs; j++) { + ics_resend(&chip8->phbs[j].lsis); + ics_resend(ICS(&chip8->phbs[j].msis)); + } } } @@ -1767,6 +1942,22 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) pmc->dt_power_mgt = pnv_dt_power_mgt; } +static bool pnv_machine_get_hb(Object *obj, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + return !!pnv->fw_load_addr; +} + +static void pnv_machine_set_hb(Object *obj, bool value, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + if (value) { + pnv->fw_load_addr = 0x8000000; + } +} + static void pnv_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1786,6 +1977,13 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data) */ mc->default_ram_size = INITRD_LOAD_ADDR + INITRD_MAX_SIZE; ispc->print_info = pnv_pic_print_info; + + object_class_property_add_bool(oc, "hb-mode", + pnv_machine_get_hb, pnv_machine_set_hb, + &error_abort); + object_class_property_set_description(oc, "hb-mode", + "Use a hostboot like boot loader", + NULL); } #define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \ diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 8ca5fbd..2345620 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -40,11 +40,11 @@ static const char *pnv_core_cpu_typename(PnvCore *pc) return cpu_type; } -static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip) +static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); cpu_reset(cs); @@ -56,7 +56,9 @@ static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip) env->nip = 0x10; env->msr |= MSR_HVB; /* Hypervisor mode */ - pcc->intc_reset(chip, cpu); + env->spr[SPR_HRMOR] = pc->hrmor; + + pcc->intc_reset(pc->chip, cpu); } /* @@ -162,14 +164,14 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp) +static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) { CPUPPCState *env = &cpu->env; int core_pir; int thread_index = 0; /* TODO: TCG supports only one thread */ ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; Error *local_err = NULL; - PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { @@ -177,13 +179,13 @@ static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp) return; } - pcc->intc_create(chip, cpu, &local_err); + pcc->intc_create(pc->chip, cpu, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort); + core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort); /* * The PIR of a thread is the core PIR + the thread index. We will @@ -203,7 +205,7 @@ static void pnv_core_reset(void *dev) int i; for (i = 0; i < cc->nr_threads; i++) { - pnv_core_cpu_reset(pc->threads[i], pc->chip); + pnv_core_cpu_reset(pc, pc->threads[i]); } } @@ -231,8 +233,6 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) snprintf(name, sizeof(name), "thread[%d]", i); object_property_add_child(OBJECT(pc), name, obj, &error_abort); - object_property_add_alias(obj, "core-pir", OBJECT(pc), - "pir", &error_abort); cpu->machine_data = g_new0(PnvCPUState, 1); @@ -240,7 +240,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) } for (j = 0; j < cc->nr_threads; j++) { - pnv_core_cpu_realize(pc->threads[j], pc->chip, &local_err); + pnv_core_cpu_realize(pc, pc->threads[j], &local_err); if (local_err) { goto err; } @@ -263,12 +263,12 @@ err: error_propagate(errp, local_err); } -static void pnv_core_cpu_unrealize(PowerPCCPU *cpu, PnvChip *chip) +static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu) { PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); - PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); - pcc->intc_destroy(chip, cpu); + pcc->intc_destroy(pc->chip, cpu); cpu_remove_sync(CPU(cpu)); cpu->machine_data = NULL; g_free(pnv_cpu); @@ -284,13 +284,14 @@ static void pnv_core_unrealize(DeviceState *dev, Error **errp) qemu_unregister_reset(pnv_core_reset, pc); for (i = 0; i < cc->nr_threads; i++) { - pnv_core_cpu_unrealize(pc->threads[i], pc->chip); + pnv_core_cpu_unrealize(pc, pc->threads[i]); } g_free(pc->threads); } static Property pnv_core_properties[] = { DEFINE_PROP_UINT32("pir", PnvCore, pir, 0), + DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0), DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), DEFINE_PROP_END_OF_LIST(), }; @@ -324,6 +325,7 @@ static void pnv_core_class_init(ObjectClass *oc, void *data) dc->realize = pnv_core_realize; dc->unrealize = pnv_core_unrealize; device_class_set_props(dc, pnv_core_properties); + dc->user_creatable = false; } #define DEFINE_PNV_CORE_TYPE(family, cpu_model) \ @@ -422,6 +424,7 @@ static void pnv_quad_class_init(ObjectClass *oc, void *data) dc->realize = pnv_quad_realize; device_class_set_props(dc, pnv_quad_properties); + dc->user_creatable = false; } static const TypeInfo pnv_quad_info = { diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c index 93ae42f..9a26262 100644 --- a/hw/ppc/pnv_homer.c +++ b/hw/ppc/pnv_homer.c @@ -360,6 +360,7 @@ static void pnv_homer_class_init(ObjectClass *klass, void *data) dc->realize = pnv_homer_realize; dc->desc = "PowerNV HOMER Memory"; device_class_set_props(dc, pnv_homer_properties); + dc->user_creatable = false; } static const TypeInfo pnv_homer_type_info = { diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index 22b2055..5989d72 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -762,6 +762,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data) dc->realize = pnv_lpc_realize; dc->desc = "PowerNV LPC Controller"; device_class_set_props(dc, pnv_lpc_properties); + dc->user_creatable = false; } static const TypeInfo pnv_lpc_info = { @@ -825,6 +826,7 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp) qemu_irq *irqs; qemu_irq_handler handler; PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + bool hostboot_mode = !!pnv->fw_load_addr; /* let isa_bus_new() create its own bridge on SysBus otherwise * devices speficied on the command line won't find the bus and @@ -859,7 +861,9 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp) * Start disabled. The HIOMAP protocol will activate the mapping * with HIOMAP_C_CREATE_WRITE_WINDOW */ - memory_region_set_enabled(&pnv->pnor->mmio, false); + if (!hostboot_mode) { + memory_region_set_enabled(&pnv->pnor->mmio, false); + } return isa_bus; } diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c index 2173fac..5a716c2 100644 --- a/hw/ppc/pnv_occ.c +++ b/hw/ppc/pnv_occ.c @@ -280,6 +280,7 @@ static void pnv_occ_class_init(ObjectClass *klass, void *data) dc->realize = pnv_occ_realize; dc->desc = "PowerNV OCC Controller"; device_class_set_props(dc, pnv_occ_properties); + dc->user_creatable = false; } static const TypeInfo pnv_occ_type_info = { diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c index f761d8d..c365ee5 100644 --- a/hw/ppc/pnv_pnor.c +++ b/hw/ppc/pnv_pnor.c @@ -11,6 +11,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/units.h" #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" #include "hw/loader.h" @@ -46,7 +47,8 @@ static void pnv_pnor_update(PnvPnor *s, int offset, int size) ret = blk_pwrite(s->blk, offset, s->storage + offset, offset_end - offset, 0); if (ret < 0) { - error_report("Could not update PNOR: %s", strerror(-ret)); + error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset, + strerror(-ret)); } } @@ -111,7 +113,7 @@ static void pnv_pnor_realize(DeviceState *dev, Error **errp) } static Property pnv_pnor_properties[] = { - DEFINE_PROP_INT64("size", PnvPnor, size, 128 << 20), + DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB), DEFINE_PROP_DRIVE("drive", PnvPnor, blk), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 4c5fa29..4a11fb1 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -1490,24 +1490,6 @@ int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn), } /*****************************************************************************/ -/* Debug port */ -void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val) -{ - addr &= 0xF; - switch (addr) { - case 0: - printf("%c", val); - break; - case 1: - printf("\n"); - fflush(stdout); - break; - case 2: - printf("Set loglevel to %04" PRIx32 "\n", val); - qemu_set_log(val | 0x100); - break; - } -} int ppc_cpu_pir(PowerPCCPU *cpu) { diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 862345c..111cc80 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -42,7 +42,7 @@ #include "hw/loader.h" #include "hw/rtc/mc146818rtc.h" #include "hw/isa/pc87312.h" -#include "hw/net/ne2000-isa.h" +#include "hw/qdev-properties.h" #include "sysemu/arch_init.h" #include "sysemu/kvm.h" #include "sysemu/qtest.h" @@ -60,178 +60,9 @@ #define CFG_ADDR 0xf0000510 -#define BIOS_SIZE (1 * MiB) -#define BIOS_FILENAME "ppc_rom.bin" #define KERNEL_LOAD_ADDR 0x01000000 #define INITRD_LOAD_ADDR 0x01800000 -/* Constants for devices init */ -static const int ide_iobase[2] = { 0x1f0, 0x170 }; -static const int ide_iobase2[2] = { 0x3f6, 0x376 }; -static const int ide_irq[2] = { 13, 13 }; - -#define NE2000_NB_MAX 6 - -static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 }; -static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 }; - -/* ISA IO ports bridge */ -#define PPC_IO_BASE 0x80000000 - -/* Fake super-io ports for PREP platform (Intel 82378ZB) */ -typedef struct sysctrl_t { - qemu_irq reset_irq; - Nvram *nvram; - uint8_t state; - uint8_t syscontrol; - int contiguous_map; - qemu_irq contiguous_map_irq; - int endian; -} sysctrl_t; - -enum { - STATE_HARDFILE = 0x01, -}; - -static sysctrl_t *sysctrl; - -static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val) -{ - sysctrl_t *sysctrl = opaque; - - trace_prep_io_800_writeb(addr - PPC_IO_BASE, val); - switch (addr) { - case 0x0092: - /* Special port 92 */ - /* Check soft reset asked */ - if (val & 0x01) { - qemu_irq_raise(sysctrl->reset_irq); - } else { - qemu_irq_lower(sysctrl->reset_irq); - } - /* Check LE mode */ - if (val & 0x02) { - sysctrl->endian = 1; - } else { - sysctrl->endian = 0; - } - break; - case 0x0800: - /* Motorola CPU configuration register : read-only */ - break; - case 0x0802: - /* Motorola base module feature register : read-only */ - break; - case 0x0803: - /* Motorola base module status register : read-only */ - break; - case 0x0808: - /* Hardfile light register */ - if (val & 1) - sysctrl->state |= STATE_HARDFILE; - else - sysctrl->state &= ~STATE_HARDFILE; - break; - case 0x0810: - /* Password protect 1 register */ - if (sysctrl->nvram != NULL) { - NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram); - (k->toggle_lock)(sysctrl->nvram, 1); - } - break; - case 0x0812: - /* Password protect 2 register */ - if (sysctrl->nvram != NULL) { - NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram); - (k->toggle_lock)(sysctrl->nvram, 2); - } - break; - case 0x0814: - /* L2 invalidate register */ - // tlb_flush(first_cpu, 1); - break; - case 0x081C: - /* system control register */ - sysctrl->syscontrol = val & 0x0F; - break; - case 0x0850: - /* I/O map type register */ - sysctrl->contiguous_map = val & 0x01; - qemu_set_irq(sysctrl->contiguous_map_irq, sysctrl->contiguous_map); - break; - default: - printf("ERROR: unaffected IO port write: %04" PRIx32 - " => %02" PRIx32"\n", addr, val); - break; - } -} - -static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr) -{ - sysctrl_t *sysctrl = opaque; - uint32_t retval = 0xFF; - - switch (addr) { - case 0x0092: - /* Special port 92 */ - retval = sysctrl->endian << 1; - break; - case 0x0800: - /* Motorola CPU configuration register */ - retval = 0xEF; /* MPC750 */ - break; - case 0x0802: - /* Motorola Base module feature register */ - retval = 0xAD; /* No ESCC, PMC slot neither ethernet */ - break; - case 0x0803: - /* Motorola base module status register */ - retval = 0xE0; /* Standard MPC750 */ - break; - case 0x080C: - /* Equipment present register: - * no L2 cache - * no upgrade processor - * no cards in PCI slots - * SCSI fuse is bad - */ - retval = 0x3C; - break; - case 0x0810: - /* Motorola base module extended feature register */ - retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */ - break; - case 0x0814: - /* L2 invalidate: don't care */ - break; - case 0x0818: - /* Keylock */ - retval = 0x00; - break; - case 0x081C: - /* system control register - * 7 - 6 / 1 - 0: L2 cache enable - */ - retval = sysctrl->syscontrol; - break; - case 0x0823: - /* */ - retval = 0x03; /* no L2 cache */ - break; - case 0x0850: - /* I/O map type register */ - retval = sysctrl->contiguous_map; - break; - default: - printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr); - break; - } - trace_prep_io_800_readb(addr - PPC_IO_BASE, retval); - - return retval; -} - - #define NVRAM_SIZE 0x2000 static void fw_cfg_boot_set(void *opaque, const char *boot_device, @@ -247,17 +78,6 @@ static void ppc_prep_reset(void *opaque) cpu_reset(CPU(cpu)); } -static const MemoryRegionPortio prep_portio_list[] = { - /* System control ports */ - { 0x0092, 1, 1, .read = PREP_io_800_readb, .write = PREP_io_800_writeb, }, - { 0x0800, 0x52, 1, - .read = PREP_io_800_readb, .write = PREP_io_800_writeb, }, - /* Special port to get debug messages from Open-Firmware */ - { 0x0F00, 4, 1, .write = PPC_debug_write, }, - PORTIO_END_OF_LIST(), -}; - -static PortioList prep_port_list; /*****************************************************************************/ /* NVRAM helpers */ @@ -397,207 +217,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size, return 0; } -/* PowerPC PREP hardware initialisation */ -static void ppc_prep_init(MachineState *machine) -{ - ram_addr_t ram_size = machine->ram_size; - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - const char *boot_device = machine->boot_order; - MemoryRegion *sysmem = get_system_memory(); - PowerPCCPU *cpu = NULL; - CPUPPCState *env = NULL; - Nvram *m48t59; -#if 0 - MemoryRegion *xcsr = g_new(MemoryRegion, 1); -#endif - int linux_boot, i, nb_nics1; - MemoryRegion *ram = g_new(MemoryRegion, 1); - uint32_t kernel_base, initrd_base; - long kernel_size, initrd_size; - DeviceState *dev; - PCIHostState *pcihost; - PCIBus *pci_bus; - PCIDevice *pci; - ISABus *isa_bus; - ISADevice *isa; - int ppc_boot_device; - DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; - - sysctrl = g_malloc0(sizeof(sysctrl_t)); - - linux_boot = (kernel_filename != NULL); - - /* init CPUs */ - for (i = 0; i < machine->smp.cpus; i++) { - cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); - env = &cpu->env; - - if (env->flags & POWERPC_FLAG_RTC_CLK) { - /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */ - cpu_ppc_tb_init(env, 7812500UL); - } else { - /* Set time-base frequency to 100 Mhz */ - cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL); - } - qemu_register_reset(ppc_prep_reset, cpu); - } - - /* allocate RAM */ - memory_region_allocate_system_memory(ram, NULL, "ppc_prep.ram", ram_size); - memory_region_add_subregion(sysmem, 0, ram); - - if (linux_boot) { - kernel_base = KERNEL_LOAD_ADDR; - /* now we can load the kernel */ - kernel_size = load_image_targphys(kernel_filename, kernel_base, - ram_size - kernel_base); - if (kernel_size < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - /* load initrd */ - if (initrd_filename) { - initrd_base = INITRD_LOAD_ADDR; - initrd_size = load_image_targphys(initrd_filename, initrd_base, - ram_size - initrd_base); - if (initrd_size < 0) { - error_report("could not load initial ram disk '%s'", - initrd_filename); - exit(1); - } - } else { - initrd_base = 0; - initrd_size = 0; - } - ppc_boot_device = 'm'; - } else { - kernel_base = 0; - kernel_size = 0; - initrd_base = 0; - initrd_size = 0; - ppc_boot_device = '\0'; - /* For now, OHW cannot boot from the network. */ - for (i = 0; boot_device[i] != '\0'; i++) { - if (boot_device[i] >= 'a' && boot_device[i] <= 'f') { - ppc_boot_device = boot_device[i]; - break; - } - } - if (ppc_boot_device == '\0') { - error_report("No valid boot device for Mac99 machine"); - exit(1); - } - } - - if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) { - error_report("Only 6xx bus is supported on PREP machine"); - exit(1); - } - - dev = qdev_create(NULL, "raven-pcihost"); - if (bios_name == NULL) { - bios_name = BIOS_FILENAME; - } - qdev_prop_set_string(dev, "bios-name", bios_name); - qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE); - qdev_prop_set_bit(dev, "is-legacy-prep", true); - pcihost = PCI_HOST_BRIDGE(dev); - object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL); - qdev_init_nofail(dev); - pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0"); - if (pci_bus == NULL) { - error_report("Couldn't create PCI host controller"); - exit(1); - } - sysctrl->contiguous_map_irq = qdev_get_gpio_in(dev, 0); - - /* PCI -> ISA bridge */ - pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378"); - cpu = POWERPC_CPU(first_cpu); - qdev_connect_gpio_out(&pci->qdev, 0, - cpu->env.irq_inputs[PPC6xx_INPUT_INT]); - sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9)); - sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11)); - sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9)); - sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11)); - isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci), "isa.0")); - - /* Super I/O (parallel + serial ports) */ - isa = isa_create(isa_bus, TYPE_PC87312_SUPERIO); - dev = DEVICE(isa); - qdev_prop_set_uint8(dev, "config", 13); /* fdc, ser0, ser1, par0 */ - qdev_init_nofail(dev); - - /* init basic PC hardware */ - pci_vga_init(pci_bus); - - nb_nics1 = nb_nics; - if (nb_nics1 > NE2000_NB_MAX) - nb_nics1 = NE2000_NB_MAX; - for(i = 0; i < nb_nics1; i++) { - if (nd_table[i].model == NULL) { - nd_table[i].model = g_strdup("ne2k_isa"); - } - if (strcmp(nd_table[i].model, "ne2k_isa") == 0) { - isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i], - &nd_table[i]); - } else { - pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); - } - } - - ide_drive_get(hd, ARRAY_SIZE(hd)); - for(i = 0; i < MAX_IDE_BUS; i++) { - isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i], - hd[2 * i], - hd[2 * i + 1]); - } - - cpu = POWERPC_CPU(first_cpu); - sysctrl->reset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET]; - - portio_list_init(&prep_port_list, NULL, prep_portio_list, sysctrl, "prep"); - portio_list_add(&prep_port_list, isa_address_space_io(isa), 0x0); - - /* - * PowerPC control and status register group: unimplemented, - * would be at address 0xFEFF0000. - */ - - if (machine_usb(machine)) { - pci_create_simple(pci_bus, -1, "pci-ohci"); - } - - m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 2000, 59); - if (m48t59 == NULL) - return; - sysctrl->nvram = m48t59; - - /* Initialise NVRAM */ - PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", ram_size, - ppc_boot_device, - kernel_base, kernel_size, - kernel_cmdline, - initrd_base, initrd_size, - /* XXX: need an option to load a NVRAM image */ - 0, - graphic_width, graphic_height, graphic_depth); -} - -static void prep_machine_init(MachineClass *mc) -{ - mc->deprecation_reason = "use 40p machine type instead"; - mc->desc = "PowerPC PREP platform"; - mc->init = ppc_prep_init; - mc->block_default_type = IF_IDE; - mc->max_cpus = MAX_CPUS; - mc->default_boot_order = "cad"; - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("602"); - mc->default_display = "std"; -} - static int prep_set_cmos_checksum(DeviceState *dev, void *opaque) { uint16_t checksum = *(uint16_t *)opaque; @@ -821,4 +440,3 @@ static void ibm_40p_machine_init(MachineClass *mc) } DEFINE_MACHINE("40p", ibm_40p_machine_init) -DEFINE_MACHINE("prep", prep_machine_init) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a0076e5..c9b2e0a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -46,6 +46,7 @@ #include "migration/qemu-file-types.h" #include "migration/global_state.h" #include "migration/register.h" +#include "migration/blocker.h" #include "mmu-hash64.h" #include "mmu-book3s-v3.h" #include "cpu-models.h" @@ -1677,6 +1678,14 @@ static void spapr_machine_reset(MachineState *machine) first_ppc_cpu->env.gpr[5] = 0; spapr->cas_reboot = false; + + spapr->mc_status = -1; + spapr->guest_machine_check_addr = -1; + + /* Signal all vCPUs waiting on this condition */ + qemu_cond_broadcast(&spapr->mc_delivery_cond); + + migrate_del_blocker(spapr->fwnmi_migration_blocker); } static void spapr_create_nvram(SpaprMachineState *spapr) @@ -1959,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = { }, }; +static bool spapr_fwnmi_needed(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + return spapr->guest_machine_check_addr != -1; +} + +static int spapr_fwnmi_pre_save(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + /* + * Check if machine check handling is in progress and print a + * warning message. + */ + if (spapr->mc_status != -1) { + warn_report("A machine check is being handled during migration. The" + "handler may run and log hardware error on the destination"); + } + + return 0; +} + +static const VMStateDescription vmstate_spapr_machine_check = { + .name = "spapr_machine_check", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_fwnmi_needed, + .pre_save = spapr_fwnmi_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState), + VMSTATE_INT32(mc_status, SpaprMachineState), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_spapr = { .name = "spapr", .version_id = 3, @@ -1992,6 +2037,8 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_dtb, &vmstate_spapr_cap_large_decr, &vmstate_spapr_cap_ccf_assist, + &vmstate_spapr_cap_fwnmi, + &vmstate_spapr_machine_check, NULL } }; @@ -2807,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine) spapr_create_lmb_dr_connectors(spapr); } + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) { + /* Create the error string for live migration blocker */ + error_setg(&spapr->fwnmi_migration_blocker, + "A machine check is being handled during migration. The handler" + "may run and log hardware error on the destination"); + } + /* Set up RTAS event infrastructure */ spapr_events_init(spapr); @@ -2970,6 +3024,8 @@ static void spapr_machine_init(MachineState *machine) kvmppc_spapr_enable_inkernel_multitce(); } + + qemu_cond_init(&spapr->mc_delivery_cond); } static int spapr_kvm_type(MachineState *machine, const char *vm_type) @@ -4397,7 +4453,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; - smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON; spapr_caps_add_properties(smc, &error_abort); smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; @@ -4465,8 +4522,12 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true); */ static void spapr_machine_4_2_class_options(MachineClass *mc) { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + spapr_machine_5_0_class_options(mc); compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len); + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF; } DEFINE_SPAPR_MACHINE(4_2, "4.2", false); diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 481dfd2..8b27d3a 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -485,17 +485,48 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); if (tcg_enabled() && val) { - /* TODO - for now only allow broken for TCG */ - error_setg(errp, -"Requested count cache flush assist capability level not supported by tcg," - " try appending -machine cap-ccf-assist=off"); + /* TCG doesn't implement anything here, but allow with a warning */ + warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); } else if (kvm_enabled() && (val > kvm_val)) { + uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); + + if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { + /* + * If we don't have CCF assist on the host, the assist + * instruction is a harmless no-op. It won't correctly + * implement the cache count flush *but* if we have + * count-cache-disabled in the host, that flush is + * unnnecessary. So, specifically allow this case. This + * allows us to have better performance on POWER9 DD2.3, + * while still working on POWER9 DD2.2 and POWER8 host + * cpus. + */ + return; + } error_setg(errp, "Requested count cache flush assist capability level not supported by kvm," " try appending -machine cap-ccf-assist=off"); } } +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val, + Error **errp) +{ + if (!val) { + return; /* Disabled by default */ + } + + if (tcg_enabled()) { + warn_report("Firmware Assisted Non-Maskable Interrupts(FWNMI) not " + "supported in TCG"); + } else if (kvm_enabled()) { + if (kvmppc_set_fwnmi() < 0) { + error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) " + "not supported by KVM"); + } + } +} + SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { [SPAPR_CAP_HTM] = { .name = "htm", @@ -595,6 +626,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "bool", .apply = cap_ccf_assist_apply, }, + [SPAPR_CAP_FWNMI_MCE] = { + .name = "fwnmi-mce", + .description = "Handle fwnmi machine check exceptions", + .index = SPAPR_CAP_FWNMI_MCE, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_fwnmi_mce_apply, + }, }; static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr, @@ -734,6 +774,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE); SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE); void spapr_caps_init(SpaprMachineState *spapr) { diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index e355e00..884e455 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -40,8 +40,10 @@ #include "hw/ppc/spapr_drc.h" #include "qemu/help_option.h" #include "qemu/bcd.h" +#include "qemu/main-loop.h" #include "hw/ppc/spapr_ovec.h" #include <libfdt.h> +#include "migration/blocker.h" #define RTAS_LOG_VERSION_MASK 0xff000000 #define RTAS_LOG_VERSION_6 0x06000000 @@ -213,6 +215,104 @@ struct hp_extended_log { struct rtas_event_log_v6_hp hp; } QEMU_PACKED; +struct rtas_event_log_v6_mc { +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ + struct rtas_event_log_v6_section_header hdr; + uint32_t fru_id; + uint32_t proc_id; + uint8_t error_type; +#define RTAS_LOG_V6_MC_TYPE_UE 0 +#define RTAS_LOG_V6_MC_TYPE_SLB 1 +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 +#define RTAS_LOG_V6_MC_TYPE_TLB 4 +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 + uint8_t sub_err_type; +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 +#define RTAS_LOG_V6_MC_UE_IFETCH 1 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 +#define RTAS_LOG_V6_MC_SLB_PARITY 0 +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 +#define RTAS_LOG_V6_MC_TLB_PARITY 1 +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 + uint8_t reserved_1[6]; + uint64_t effective_address; + uint64_t logical_address; +} QEMU_PACKED; + +struct mc_extended_log { + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_mc mc; +} QEMU_PACKED; + +struct MC_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_ierror_table mc_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +struct MC_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_derror_table mc_derror_table[] = { +{ 0x00008000, false, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00004000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000800, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000400, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000080, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */ + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000100, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + typedef enum EventClass { EVENT_CLASS_INTERNAL_ERRORS = 0, EVENT_CLASS_EPOW = 1, @@ -622,6 +722,175 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, + struct mc_extended_log *ext_elog) +{ + int i; + CPUPPCState *env = &cpu->env; + uint32_t summary; + uint64_t dsisr = env->spr[SPR_DSISR]; + + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; + if (recovered) { + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; + } else { + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; + } + + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { + if (!(dsisr & mc_derror_table[i].dsisr_value)) { + continue; + } + + ext_elog->mc.error_type = mc_derror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype; + if (mc_derror_table[i].dar_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]); + } + + summary |= mc_derror_table[i].initiator + | mc_derror_table[i].severity; + + return summary; + } + } else { + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != + mc_ierror_table[i].srr1_value) { + continue; + } + + ext_elog->mc.error_type = mc_ierror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype; + if (mc_ierror_table[i].nip_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->nip); + } + + summary |= mc_ierror_table[i].initiator + | mc_ierror_table[i].severity; + + return summary; + } + } + + summary |= RTAS_LOG_INITIATOR_CPU; + return summary; +} + +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(cpu); + uint64_t rtas_addr; + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + target_ulong msr = 0; + struct rtas_error_log log; + struct mc_extended_log *ext_elog; + uint32_t summary; + + /* + * Properly set bits in MSR before we invoke the handler. + * SRR0/1, DAR and DSISR are properly set by KVM + */ + if (!(*pcc->interrupts_big_endian)(cpu)) { + msr |= (1ULL << MSR_LE); + } + + if (env->msr & (1ULL << MSR_SF)) { + msr |= (1ULL << MSR_SF); + } + + msr |= (1ULL << MSR_ME); + + ext_elog = g_malloc0(sizeof(*ext_elog)); + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); + + log.summary = cpu_to_be32(summary); + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); + + spapr_init_v6hdr(&ext_elog->v6hdr); + ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); + ext_elog->mc.hdr.section_length = + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); + ext_elog->mc.hdr.section_version = 1; + + /* get rtas addr from fdt */ + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + /* Unable to fetch rtas_addr. Hence reset the guest */ + ppc_cpu_do_system_reset(cs); + g_free(ext_elog); + return; + } + + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, + env->gpr[3]); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]), &log, sizeof(log)); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]) + sizeof(log), ext_elog, + sizeof(*ext_elog)); + + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; + env->msr = msr; + env->nip = spapr->guest_machine_check_addr; + + g_free(ext_elog); +} + +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(cpu); + int ret; + Error *local_err = NULL; + + if (spapr->guest_machine_check_addr == -1) { + /* + * This implies that we have hit a machine check either when the + * guest has not registered FWNMI (i.e., "ibm,nmi-register" not + * called) or between system reset and "ibm,nmi-register". + * Fall back to the old machine check behavior in such cases. + */ + cs->exception_index = POWERPC_EXCP_MCHECK; + ppc_cpu_do_interrupt(cs); + return; + } + + while (spapr->mc_status != -1) { + /* + * Check whether the same CPU got machine check error + * while still handling the mc error (i.e., before + * that CPU called "ibm,nmi-interlock") + */ + if (spapr->mc_status == cpu->vcpu_id) { + qemu_system_guest_panicked(NULL); + return; + } + qemu_cond_wait_iothread(&spapr->mc_delivery_cond); + /* Meanwhile if the system is reset, then just return */ + if (spapr->guest_machine_check_addr == -1) { + return; + } + } + + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err); + if (ret == -EBUSY) { + /* + * We don't want to abort so we let the migration to continue. + * In a rare case, the machine check handler will run on the target. + * Though this is not preferable, it is better than aborting + * the migration or killing the VM. + */ + warn_report("Received a fwnmi while migration was in progress"); + } + + spapr->mc_status = cpu->vcpu_id; + spapr_mce_dispatch_elog(cpu, recovered); +} + static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index f1799b1..b8bb66b 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1676,6 +1676,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, Error *local_err = NULL; bool raw_mode_supported = false; bool guest_xive; + CPUState *cs; + + /* CAS is supposed to be called early when only the boot vCPU is active. */ + CPU_FOREACH(cs) { + if (cs == CPU(cpu)) { + continue; + } + if (!cs->halted) { + warn_report("guest has multiple active vCPUs at CAS, which is not allowed"); + return H_MULTI_THREADS_ACTIVE; + } + } cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err); if (local_err) { @@ -1703,7 +1715,15 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, ov_table = addr; ov1_guest = spapr_ovec_parse_vector(ov_table, 1); + if (!ov1_guest) { + warn_report("guest didn't provide option vector 1"); + return H_PARAMETER; + } ov5_guest = spapr_ovec_parse_vector(ov_table, 5); + if (!ov5_guest) { + warn_report("guest didn't provide option vector 5"); + return H_PARAMETER; + } if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) { error_report("guest requested hash and radix MMU, which is invalid."); exit(EXIT_FAILURE); diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 85135e0..883fe28 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -50,6 +50,7 @@ #include "hw/ppc/fdt.h" #include "target/ppc/mmu-hash64.h" #include "target/ppc/mmu-book3s-v3.h" +#include "migration/blocker.h" static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, @@ -399,6 +400,62 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr, rtas_st(rets, 1, 100); } +static void rtas_ibm_nmi_register(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + hwaddr rtas_addr; + + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + spapr->guest_machine_check_addr = rtas_ld(args, 1); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + if (spapr->guest_machine_check_addr == -1) { + /* NMI register not called */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + if (spapr->mc_status != cpu->vcpu_id) { + /* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* + * vCPU issuing "ibm,nmi-interlock" is done with NMI handling, + * hence unset mc_status. + */ + spapr->mc_status = -1; + qemu_cond_signal(&spapr->mc_delivery_cond); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + migrate_del_blocker(spapr->fwnmi_migration_blocker); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -476,6 +533,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) } } +hwaddr spapr_get_rtas_addr(void) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + int rtas_node; + const fdt32_t *rtas_data; + void *fdt = spapr->fdt_blob; + + /* fetch rtas addr from fdt */ + rtas_node = fdt_path_offset(fdt, "/rtas"); + if (rtas_node < 0) { + return 0; + } + + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); + if (!rtas_data) { + return 0; + } + + /* + * We assume that the OS called RTAS instantiate-rtas, but some other + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now + * as SLOF only supports 32-bit variant. + */ + return (hwaddr)fdt32_to_cpu(*rtas_data); +} + static void core_rtas_register_types(void) { spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", @@ -501,6 +584,10 @@ static void core_rtas_register_types(void) rtas_set_power_level); spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level", rtas_get_power_level); + spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register", + rtas_ibm_nmi_register); + spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock", + rtas_ibm_nmi_interlock); } type_init(core_rtas_register_types) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index f14944e..0b085ea 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -87,6 +87,7 @@ static int vio_make_devnode(SpaprVioDevice *dev, SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); int vdevice_off, node_off, ret; char *dt_name; + const char *dt_compatible; vdevice_off = fdt_path_offset(fdt, "/vdevice"); if (vdevice_off < 0) { @@ -113,9 +114,15 @@ static int vio_make_devnode(SpaprVioDevice *dev, } } - if (pc->dt_compatible) { + if (pc->get_dt_compatible) { + dt_compatible = pc->get_dt_compatible(dev); + } else { + dt_compatible = pc->dt_compatible; + } + + if (dt_compatible) { ret = fdt_setprop_string(fdt, node_off, "compatible", - pc->dt_compatible); + dt_compatible); if (ret < 0) { return ret; } diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index 7526947..91dd00e 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -89,10 +89,7 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env, tlb->PID = 0; } -static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size, - int do_init, - const char *cpu_type, - uint32_t sysclk) +static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk) { PowerPCCPU *cpu; CPUPPCState *env; @@ -213,7 +210,7 @@ static void virtex_init(MachineState *machine) int i; /* init CPUs */ - cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_type, 400000000); + cpu = ppc440_init_xilinx(machine->cpu_type, 400000000); env = &cpu->env; if (env->mmu_model != POWERPC_MMU_BOOKE) { diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig index 4c8ee87..9e67d99 100644 --- a/hw/tpm/Kconfig +++ b/hw/tpm/Kconfig @@ -22,3 +22,9 @@ config TPM_EMULATOR bool default y depends on TPMDEV + +config TPM_SPAPR + bool + default y + depends on TPM && PSERIES + select TPMDEV diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs index de0b85d..85eb99a 100644 --- a/hw/tpm/Makefile.objs +++ b/hw/tpm/Makefile.objs @@ -4,3 +4,4 @@ common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o +obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c new file mode 100644 index 0000000..ce65eb2 --- /dev/null +++ b/hw/tpm/tpm_spapr.c @@ -0,0 +1,429 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * PAPR Virtual TPM + * + * Copyright (c) 2015, 2017, 2019 IBM Corporation. + * + * Authors: + * Stefan Berger <stefanb@linux.vnet.ibm.com> + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" + +#include "sysemu/tpm_backend.h" +#include "tpm_int.h" +#include "tpm_util.h" + +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "trace.h" + +#define DEBUG_SPAPR 0 + +#define VIO_SPAPR_VTPM(obj) \ + OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR) + +typedef struct TpmCrq { + uint8_t valid; /* 0x80: cmd; 0xc0: init crq */ + /* 0x81-0x83: CRQ message response */ + uint8_t msg; /* see below */ + uint16_t len; /* len of TPM request; len of TPM response */ + uint32_t data; /* rtce_dma_handle when sending TPM request */ + uint64_t reserved; +} TpmCrq; + +#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND 0xC0 +#define SPAPR_VTPM_VALID_COMMAND 0x80 +#define SPAPR_VTPM_MSG_RESULT 0x80 + +/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */ +#define SPAPR_VTPM_INIT_CRQ_RESULT 0x1 +#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT 0x2 + +/* msg types for valid = SPAPR_VTPM_VALID_CMD */ +#define SPAPR_VTPM_GET_VERSION 0x1 +#define SPAPR_VTPM_TPM_COMMAND 0x2 +#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE 0x3 +#define SPAPR_VTPM_PREPARE_TO_SUSPEND 0x4 + +/* response error messages */ +#define SPAPR_VTPM_VTPM_ERROR 0xff + +/* error codes */ +#define SPAPR_VTPM_ERR_COPY_IN_FAILED 0x3 +#define SPAPR_VTPM_ERR_COPY_OUT_FAILED 0x4 + +#define TPM_SPAPR_BUFFER_MAX 4096 + +typedef struct { + SpaprVioDevice vdev; + + TpmCrq crq; /* track single TPM command */ + + uint8_t state; +#define SPAPR_VTPM_STATE_NONE 0 +#define SPAPR_VTPM_STATE_EXECUTION 1 +#define SPAPR_VTPM_STATE_COMPLETION 2 + + unsigned char *buffer; + + uint32_t numbytes; /* number of bytes to deliver on resume */ + + TPMBackendCmd cmd; + + TPMBackend *be_driver; + TPMVersion be_tpm_version; + + size_t be_buffer_size; +} SpaprTpmState; + +/* + * Send a request to the TPM. + */ +static void tpm_spapr_tpm_send(SpaprTpmState *s) +{ + if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); + } + + s->state = SPAPR_VTPM_STATE_EXECUTION; + s->cmd = (TPMBackendCmd) { + .locty = 0, + .in = s->buffer, + .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size), + .out = s->buffer, + .out_len = s->be_buffer_size, + }; + + tpm_backend_deliver_request(s->be_driver, &s->cmd); +} + +static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr) +{ + long rc; + + /* a max. of be_buffer_size bytes can be transported */ + rc = spapr_vio_dma_read(&s->vdev, dataptr, + s->buffer, s->be_buffer_size); + if (rc) { + error_report("tpm_spapr_got_payload: DMA read failure"); + } + /* let vTPM handle any malformed request */ + tpm_spapr_tpm_send(s); + + return rc; +} + +static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq) +{ + return spapr_vio_send_crq(dev, (uint8_t *)crq); +} + +static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + TpmCrq local_crq; + TpmCrq *crq = &s->crq; /* requests only */ + int rc; + uint8_t valid = crq_data[0]; + uint8_t msg = crq_data[1]; + + trace_tpm_spapr_do_crq(valid, msg); + + switch (valid) { + case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */ + + /* Respond to initialization request */ + switch (msg) { + case SPAPR_VTPM_INIT_CRQ_RESULT: + trace_tpm_spapr_do_crq_crq_result(); + memset(&local_crq, 0, sizeof(local_crq)); + local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; + local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT; + spapr_tpm_send_crq(dev, &local_crq); + break; + + case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT: + trace_tpm_spapr_do_crq_crq_complete_result(); + memset(&local_crq, 0, sizeof(local_crq)); + local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; + local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT; + spapr_tpm_send_crq(dev, &local_crq); + break; + } + + break; + case SPAPR_VTPM_VALID_COMMAND: /* Payloads */ + switch (msg) { + case SPAPR_VTPM_TPM_COMMAND: + trace_tpm_spapr_do_crq_tpm_command(); + if (s->state == SPAPR_VTPM_STATE_EXECUTION) { + return H_BUSY; + } + memcpy(crq, crq_data, sizeof(*crq)); + + rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data)); + + if (rc == H_SUCCESS) { + crq->valid = be16_to_cpu(0); + } else { + local_crq.valid = SPAPR_VTPM_MSG_RESULT; + local_crq.msg = SPAPR_VTPM_VTPM_ERROR; + local_crq.len = cpu_to_be16(0); + local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED); + spapr_tpm_send_crq(dev, &local_crq); + } + break; + + case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE: + trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size); + local_crq.valid = SPAPR_VTPM_VALID_COMMAND; + local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE | + SPAPR_VTPM_MSG_RESULT; + local_crq.len = cpu_to_be16(s->be_buffer_size); + spapr_tpm_send_crq(dev, &local_crq); + break; + + case SPAPR_VTPM_GET_VERSION: + local_crq.valid = SPAPR_VTPM_VALID_COMMAND; + local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT; + local_crq.len = cpu_to_be16(0); + switch (s->be_tpm_version) { + case TPM_VERSION_1_2: + local_crq.data = cpu_to_be32(1); + break; + case TPM_VERSION_2_0: + local_crq.data = cpu_to_be32(2); + break; + default: + g_assert_not_reached(); + break; + } + trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data)); + spapr_tpm_send_crq(dev, &local_crq); + break; + + case SPAPR_VTPM_PREPARE_TO_SUSPEND: + trace_tpm_spapr_do_crq_prepare_to_suspend(); + local_crq.valid = SPAPR_VTPM_VALID_COMMAND; + local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND | + SPAPR_VTPM_MSG_RESULT; + spapr_tpm_send_crq(dev, &local_crq); + break; + + default: + trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg); + } + break; + default: + trace_tpm_spapr_do_crq_unknown_crq(valid, msg); + }; + + return H_SUCCESS; +} + +static void tpm_spapr_request_completed(TPMIf *ti, int ret) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(ti); + TpmCrq *crq = &s->crq; + uint32_t len; + int rc; + + s->state = SPAPR_VTPM_STATE_COMPLETION; + + /* a max. of be_buffer_size bytes can be transported */ + len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size); + + if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { + trace_tpm_spapr_caught_response(len); + /* defer delivery of response until .post_load */ + s->numbytes = len; + return; + } + + rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data), + s->buffer, len); + + if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, len, "From TPM"); + } + + crq->valid = SPAPR_VTPM_MSG_RESULT; + if (rc == H_SUCCESS) { + crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT; + crq->len = cpu_to_be16(len); + } else { + error_report("%s: DMA write failure", __func__); + crq->msg = SPAPR_VTPM_VTPM_ERROR; + crq->len = cpu_to_be16(0); + crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED); + } + + rc = spapr_tpm_send_crq(&s->vdev, crq); + if (rc) { + error_report("%s: Error sending response", __func__); + } +} + +static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize) +{ + return tpm_backend_startup_tpm(s->be_driver, buffersize); +} + +static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + switch (s->be_tpm_version) { + case TPM_VERSION_1_2: + return "IBM,vtpm"; + case TPM_VERSION_2_0: + return "IBM,vtpm20"; + default: + g_assert_not_reached(); + } +} + +static void tpm_spapr_reset(SpaprVioDevice *dev) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + s->state = SPAPR_VTPM_STATE_NONE; + s->numbytes = 0; + + s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); + + s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver), + TPM_SPAPR_BUFFER_MAX); + + tpm_backend_reset(s->be_driver); + tpm_spapr_do_startup_tpm(s, s->be_buffer_size); +} + +static enum TPMVersion tpm_spapr_get_version(TPMIf *ti) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(ti); + + if (tpm_backend_had_startup_error(s->be_driver)) { + return TPM_VERSION_UNSPEC; + } + + return tpm_backend_get_tpm_version(s->be_driver); +} + +/* persistent state handling */ + +static int tpm_spapr_pre_save(void *opaque) +{ + SpaprTpmState *s = opaque; + + tpm_backend_finish_sync(s->be_driver); + /* + * we cannot deliver the results to the VM since DMA would touch VM memory + */ + + return 0; +} + +static int tpm_spapr_post_load(void *opaque, int version_id) +{ + SpaprTpmState *s = opaque; + + if (s->numbytes) { + trace_tpm_spapr_post_load(); + /* deliver the results to the VM via DMA */ + tpm_spapr_request_completed(TPM_IF(s), 0); + s->numbytes = 0; + } + + return 0; +} + +static const VMStateDescription vmstate_spapr_vtpm = { + .name = "tpm-spapr", + .pre_save = tpm_spapr_pre_save, + .post_load = tpm_spapr_post_load, + .fields = (VMStateField[]) { + VMSTATE_SPAPR_VIO(vdev, SpaprTpmState), + + VMSTATE_UINT8(state, SpaprTpmState), + VMSTATE_UINT32(numbytes, SpaprTpmState), + VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes), + /* remember DMA address */ + VMSTATE_UINT32(crq.data, SpaprTpmState), + VMSTATE_END_OF_LIST(), + } +}; + +static Property tpm_spapr_properties[] = { + DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev), + DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver), + DEFINE_PROP_END_OF_LIST(), +}; + +static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + if (!tpm_find()) { + error_setg(errp, "at most one TPM device is permitted"); + return; + } + + dev->crq.SendFunc = tpm_spapr_do_crq; + + if (!s->be_driver) { + error_setg(errp, "'tpmdev' property is required"); + return; + } + s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX); +} + +static void tpm_spapr_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); + TPMIfClass *tc = TPM_IF_CLASS(klass); + + k->realize = tpm_spapr_realizefn; + k->reset = tpm_spapr_reset; + k->dt_name = "vtpm"; + k->dt_type = "IBM,vtpm"; + k->get_dt_compatible = tpm_spapr_get_dt_compatible; + k->signal_mask = 0x00000001; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, tpm_spapr_properties); + k->rtce_window_size = 0x10000000; + dc->vmsd = &vmstate_spapr_vtpm; + + tc->model = TPM_MODEL_TPM_SPAPR; + tc->get_version = tpm_spapr_get_version; + tc->request_completed = tpm_spapr_request_completed; +} + +static const TypeInfo tpm_spapr_info = { + .name = TYPE_TPM_SPAPR, + .parent = TYPE_VIO_SPAPR_DEVICE, + .instance_size = sizeof(SpaprTpmState), + .class_init = tpm_spapr_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_TPM_IF }, + { } + } +}; + +static void tpm_spapr_register_types(void) +{ + type_register_static(&tpm_spapr_info); +} + +type_init(tpm_spapr_register_types) diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c index 5362df2..31facb8 100644 --- a/hw/tpm/tpm_tis.c +++ b/hw/tpm/tpm_tis.c @@ -107,30 +107,6 @@ static uint8_t tpm_tis_locality_from_addr(hwaddr addr) return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7); } -static void tpm_tis_show_buffer(const unsigned char *buffer, - size_t buffer_size, const char *string) -{ - size_t len, i; - char *line_buffer, *p; - - len = MIN(tpm_cmd_get_size(buffer), buffer_size); - - /* - * allocate enough room for 3 chars per buffer entry plus a - * newline after every 16 chars and a final null terminator. - */ - line_buffer = g_malloc(len * 3 + (len / 16) + 1); - - for (i = 0, p = line_buffer; i < len; i++) { - if (i && !(i % 16)) { - p += sprintf(p, "\n"); - } - p += sprintf(p, "%.2X ", buffer[i]); - } - trace_tpm_tis_show_buffer(string, len, line_buffer); - - g_free(line_buffer); -} /* * Set the given flags in the STS register by clearing the register but @@ -156,8 +132,8 @@ static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags) */ static void tpm_tis_tpm_send(TPMState *s, uint8_t locty) { - if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { - tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); + if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); } /* @@ -325,8 +301,8 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret) s->loc[locty].state = TPM_TIS_STATE_COMPLETION; s->rw_offset = 0; - if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { - tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); + if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); } if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) { diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c index 62b091f..c0a0f3d 100644 --- a/hw/tpm/tpm_util.c +++ b/hw/tpm/tpm_util.c @@ -350,3 +350,28 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb) tsb->buffer = NULL; tsb->size = 0; } + +void tpm_util_show_buffer(const unsigned char *buffer, + size_t buffer_size, const char *string) +{ + size_t len, i; + char *line_buffer, *p; + + len = MIN(tpm_cmd_get_size(buffer), buffer_size); + + /* + * allocate enough room for 3 chars per buffer entry plus a + * newline after every 16 chars and a final null terminator. + */ + line_buffer = g_malloc(len * 3 + (len / 16) + 1); + + for (i = 0, p = line_buffer; i < len; i++) { + if (i && !(i % 16)) { + p += sprintf(p, "\n"); + } + p += sprintf(p, "%.2X ", buffer[i]); + } + trace_tpm_util_show_buffer(string, len, line_buffer); + + g_free(line_buffer); +} diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h index f397ac2..7889081 100644 --- a/hw/tpm/tpm_util.h +++ b/hw/tpm/tpm_util.h @@ -79,4 +79,7 @@ typedef struct TPMSizedBuffer { void tpm_sized_buffer_reset(TPMSizedBuffer *tsb); +void tpm_util_show_buffer(const unsigned char *buffer, + size_t buffer_size, const char *string); + #endif /* TPM_TPM_UTIL_H */ diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events index 89804bc..439e514 100644 --- a/hw/tpm/trace-events +++ b/hw/tpm/trace-events @@ -14,6 +14,7 @@ tpm_util_get_buffer_size_len(uint32_t len, size_t expected) "tpm_resp->len = %u, tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu" tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu" tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu" +tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" # tpm_emulator.c tpm_emulator_set_locality(uint8_t locty) "setting locality to %d" @@ -36,7 +37,6 @@ tpm_emulator_pre_save(void) "" tpm_emulator_inst_init(void) "" # tpm_tis.c -tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s" tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x" tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d" tpm_tis_abort(uint8_t locty) "New active locality is %d" @@ -55,3 +55,17 @@ tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u" # tpm_ppi.c tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu" + +# hw/tpm/tpm_spapr.c +tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" +tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x" +tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT" +tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT" +tpm_spapr_do_crq_tpm_command(void) "got TPM command payload" +tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu" +tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u" +tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend" +tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x" +tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..." +tpm_spapr_post_load(void) "Delivering TPM response after resume" +tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes" |