aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-06-16 18:26:21 +0100
committerPeter Maydell <peter.maydell@linaro.org>2014-06-16 18:26:21 +0100
commitaf44da87e926ff64260b95f4350d338c4fc113ca (patch)
tree303a18d80e73641bb6e23218ac7b7df0666bcc6b /hw
parentf27701510cdce9f76cdad0aaf9fb0bbcb23d299a (diff)
parent9dbae97723e964692364fb43012c6fa5448a661f (diff)
downloadqemu-af44da87e926ff64260b95f4350d338c4fc113ca.zip
qemu-af44da87e926ff64260b95f4350d338c4fc113ca.tar.gz
qemu-af44da87e926ff64260b95f4350d338c4fc113ca.tar.bz2
Merge remote-tracking branch 'remotes/agraf/tags/signed-ppc-for-upstream' into staging
Patch queue for ppc - 2014-06-16 This pull request brings a lot of fun things. Among others we have - e500: u-boot firmware support - sPAPR: magic page enablement - sPAPR: add "compat" CPU option to support older guests - sPAPR: refactorings in preparation for VFIO - POWER8 live migration - mac99: expose bus frequency - little endian core dump, gdb and disas support - new ppc64le-linux-user target - DFP emulation - bug fixes # gpg: Signature made Mon 16 Jun 2014 12:28:32 BST using RSA key ID 03FEDC60 # gpg: Can't check signature: public key not found * remotes/agraf/tags/signed-ppc-for-upstream: (156 commits) spapr_pci: Advertise MSI quota PPC: KVM: Make pv hcall endian agnostic powerpc: use float64 for frsqrte spapr: Add kvm-type property spapr: Create SPAPRMachine struct linux-user: Tell guest about big host page sizes spapr_hcall: Add address-translation-mode-on-interrupt resource in H_SET_MODE spapr_hcall: Split h_set_mode() target-ppc: Enable DABRX SPR and limit it to <=POWER7 target-ppc: Enable PPR and VRSAVE SPRs migration target-ppc: Add POWER8's Event Based Branch (EBB) control SPRs KVM: target-ppc: Enable TM state migration target-ppc: Add POWER8's TM SPRs target-ppc: Add POWER8's MMCR2/MMCRS SPRs target-ppc: Enable FSCR facility check for TAR target-ppc: Add POWER8's FSCR SPR target-ppc: Add POWER8's TIR SPR target-ppc: Refactor class init for POWER7/8 target-ppc: Switch POWER7/8 classes to use correct PMU SPRs target-ppc: Make use of gen_spr_power5p_lpar() for POWER7/8 ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/ide/macio.c71
-rw-r--r--hw/intc/openpic.c99
-rw-r--r--hw/intc/openpic_kvm.c15
-rw-r--r--hw/misc/macio/mac_dbdma.c6
-rw-r--r--hw/misc/macio/macio.c2
-rw-r--r--hw/net/fsl_etsec/rings.c2
-rw-r--r--hw/nvram/spapr_nvram.c2
-rw-r--r--hw/pci-host/ppce500.c41
-rw-r--r--hw/ppc/e500.c147
-rw-r--r--hw/ppc/mac_newworld.c5
-rw-r--r--hw/ppc/mac_oldworld.c5
-rw-r--r--hw/ppc/ppc.c79
-rw-r--r--hw/ppc/spapr.c234
-rw-r--r--hw/ppc/spapr_hcall.c233
-rw-r--r--hw/ppc/spapr_iommu.c181
-rw-r--r--hw/ppc/spapr_pci.c129
-rw-r--r--hw/ppc/spapr_vio.c6
17 files changed, 994 insertions, 263 deletions
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 1c20616..c14a1dd 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -193,6 +193,11 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
goto done;
}
+ if (--io->requests) {
+ /* More requests still in flight */
+ return;
+ }
+
if (!m->dma_active) {
MACIO_DPRINTF("waiting for data (%#x - %#x - %x)\n",
s->nsector, io->len, s->status);
@@ -212,6 +217,13 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
s->nsector -= n;
}
+ if (io->finish_remain_read) {
+ /* Finish a stale read from the last iteration */
+ io->finish_remain_read = false;
+ cpu_physical_memory_write(io->finish_addr, io->remainder,
+ io->finish_len);
+ }
+
MACIO_DPRINTF("remainder: %d io->len: %d nsector: %d "
"sector_num: %" PRId64 "\n",
io->remainder_len, io->len, s->nsector, sector_num);
@@ -229,7 +241,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
break;
case IDE_DMA_WRITE:
cpu_physical_memory_read(io->addr, p, remainder_len);
- bdrv_write(s->bs, sector_num - 1, io->remainder, 1);
break;
case IDE_DMA_TRIM:
break;
@@ -237,6 +248,15 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
io->addr += remainder_len;
io->len -= remainder_len;
io->remainder_len -= remainder_len;
+
+ if (s->dma_cmd == IDE_DMA_WRITE && !io->remainder_len) {
+ io->requests++;
+ qemu_iovec_reset(&io->iov);
+ qemu_iovec_add(&io->iov, io->remainder, 0x200);
+
+ m->aiocb = bdrv_aio_writev(s->bs, sector_num - 1, &io->iov, 1,
+ pmac_ide_transfer_cb, io);
+ }
}
if (s->nsector == 0 && !io->remainder_len) {
@@ -267,20 +287,25 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
switch (s->dma_cmd) {
case IDE_DMA_READ:
- bdrv_read(s->bs, sector_num + nsector, io->remainder, 1);
- cpu_physical_memory_write(io->addr + io->len - unaligned,
- io->remainder, unaligned);
+ io->requests++;
+ io->finish_addr = io->addr + io->len - unaligned;
+ io->finish_len = unaligned;
+ io->finish_remain_read = true;
+ qemu_iovec_reset(&io->iov);
+ qemu_iovec_add(&io->iov, io->remainder, 0x200);
+
+ m->aiocb = bdrv_aio_readv(s->bs, sector_num + nsector, &io->iov, 1,
+ pmac_ide_transfer_cb, io);
break;
case IDE_DMA_WRITE:
/* cache the contents in our io struct */
cpu_physical_memory_read(io->addr + io->len - unaligned,
- io->remainder, unaligned);
+ io->remainder + io->remainder_len,
+ unaligned);
break;
case IDE_DMA_TRIM:
break;
}
-
- io->len -= unaligned;
}
MACIO_DPRINTF("io->len = %#x\n", io->len);
@@ -292,10 +317,12 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
io->remainder_len = (0x200 - unaligned) & 0x1ff;
MACIO_DPRINTF("set remainder to: %d\n", io->remainder_len);
- /* We would read no data from the block layer, thus not get a callback.
- Just fake completion manually. */
+ /* Only subsector reads happening */
if (!io->len) {
- pmac_ide_transfer_cb(opaque, 0);
+ if (!io->requests) {
+ io->requests++;
+ pmac_ide_transfer_cb(opaque, ret);
+ }
return;
}
@@ -319,6 +346,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
DMA_DIRECTION_TO_DEVICE);
break;
}
+
+ io->requests++;
return;
done:
@@ -337,6 +366,27 @@ static void pmac_ide_transfer(DBDMA_io *io)
s->io_buffer_size = 0;
if (s->drive_kind == IDE_CD) {
+
+ /* Handle non-block ATAPI DMA transfers */
+ if (s->lba == -1) {
+ s->io_buffer_size = MIN(io->len, s->packet_transfer_size);
+ bdrv_acct_start(s->bs, &s->acct, s->io_buffer_size,
+ BDRV_ACCT_READ);
+ MACIO_DPRINTF("non-block ATAPI DMA transfer size: %d\n",
+ s->io_buffer_size);
+
+ /* Copy ATAPI buffer directly to RAM and finish */
+ cpu_physical_memory_write(io->addr, s->io_buffer,
+ s->io_buffer_size);
+ ide_atapi_cmd_ok(s);
+ m->dma_active = false;
+
+ MACIO_DPRINTF("end of non-block ATAPI DMA transfer\n");
+ bdrv_acct_done(s->bs, &s->acct);
+ io->dma_end(io);
+ return;
+ }
+
bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ);
pmac_ide_atapi_transfer_cb(io, 0);
return;
@@ -353,6 +403,7 @@ static void pmac_ide_transfer(DBDMA_io *io)
break;
}
+ io->requests++;
pmac_ide_transfer_cb(io, 0);
}
diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 08e0e19..028529e 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -192,6 +192,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
int idx);
static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
uint32_t val, int idx);
+static void openpic_reset(DeviceState *d);
typedef enum IRQType {
IRQ_TYPE_NORMAL = 0,
@@ -529,55 +530,6 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
}
}
-static void openpic_reset(DeviceState *d)
-{
- OpenPICState *opp = OPENPIC(d);
- int i;
-
- opp->gcr = GCR_RESET;
- /* Initialise controller registers */
- opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
- ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
- (opp->vid << FRR_VID_SHIFT);
-
- opp->pir = 0;
- opp->spve = -1 & opp->vector_mask;
- opp->tfrr = opp->tfrr_reset;
- /* Initialise IRQ sources */
- for (i = 0; i < opp->max_irq; i++) {
- opp->src[i].ivpr = opp->ivpr_reset;
- opp->src[i].idr = opp->idr_reset;
-
- switch (opp->src[i].type) {
- case IRQ_TYPE_NORMAL:
- opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
- break;
-
- case IRQ_TYPE_FSLINT:
- opp->src[i].ivpr |= IVPR_POLARITY_MASK;
- break;
-
- case IRQ_TYPE_FSLSPECIAL:
- break;
- }
- }
- /* Initialise IRQ destinations */
- for (i = 0; i < MAX_CPU; i++) {
- opp->dst[i].ctpr = 15;
- memset(&opp->dst[i].raised, 0, sizeof(IRQQueue));
- opp->dst[i].raised.next = -1;
- memset(&opp->dst[i].servicing, 0, sizeof(IRQQueue));
- opp->dst[i].servicing.next = -1;
- }
- /* Initialise timers */
- for (i = 0; i < OPENPIC_MAX_TMR; i++) {
- opp->timers[i].tccr = 0;
- opp->timers[i].tbcr = TBCR_CI;
- }
- /* Go out of RESET state */
- opp->gcr = 0;
-}
-
static inline uint32_t read_IRQreg_idr(OpenPICState *opp, int n_IRQ)
{
return opp->src[n_IRQ].idr;
@@ -1461,6 +1413,55 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
return 0;
}
+static void openpic_reset(DeviceState *d)
+{
+ OpenPICState *opp = OPENPIC(d);
+ int i;
+
+ opp->gcr = GCR_RESET;
+ /* Initialise controller registers */
+ opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+ ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
+ (opp->vid << FRR_VID_SHIFT);
+
+ opp->pir = 0;
+ opp->spve = -1 & opp->vector_mask;
+ opp->tfrr = opp->tfrr_reset;
+ /* Initialise IRQ sources */
+ for (i = 0; i < opp->max_irq; i++) {
+ opp->src[i].ivpr = opp->ivpr_reset;
+ switch (opp->src[i].type) {
+ case IRQ_TYPE_NORMAL:
+ opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+ break;
+
+ case IRQ_TYPE_FSLINT:
+ opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+ break;
+
+ case IRQ_TYPE_FSLSPECIAL:
+ break;
+ }
+
+ write_IRQreg_idr(opp, i, opp->idr_reset);
+ }
+ /* Initialise IRQ destinations */
+ for (i = 0; i < MAX_CPU; i++) {
+ opp->dst[i].ctpr = 15;
+ memset(&opp->dst[i].raised, 0, sizeof(IRQQueue));
+ opp->dst[i].raised.next = -1;
+ memset(&opp->dst[i].servicing, 0, sizeof(IRQQueue));
+ opp->dst[i].servicing.next = -1;
+ }
+ /* Initialise timers */
+ for (i = 0; i < OPENPIC_MAX_TMR; i++) {
+ opp->timers[i].tccr = 0;
+ opp->timers[i].tbcr = TBCR_CI;
+ }
+ /* Go out of RESET state */
+ opp->gcr = 0;
+}
+
typedef struct MemReg {
const char *name;
MemoryRegionOps const *ops;
diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c
index 585ab4f..e3bce04 100644
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c
@@ -31,6 +31,8 @@
#include "sysemu/kvm.h"
#include "qemu/log.h"
+#define GCR_RESET 0x80000000
+
#define KVM_OPENPIC(obj) \
OBJECT_CHECK(KVMOpenPICState, (obj), TYPE_KVM_OPENPIC)
@@ -50,11 +52,6 @@ static void kvm_openpic_set_irq(void *opaque, int n_IRQ, int level)
kvm_set_irq(kvm_state, n_IRQ, level);
}
-static void kvm_openpic_reset(DeviceState *d)
-{
- qemu_log_mask(LOG_UNIMP, "%s: unimplemented\n", __func__);
-}
-
static void kvm_openpic_write(void *opaque, hwaddr addr, uint64_t val,
unsigned size)
{
@@ -74,6 +71,14 @@ static void kvm_openpic_write(void *opaque, hwaddr addr, uint64_t val,
}
}
+static void kvm_openpic_reset(DeviceState *d)
+{
+ KVMOpenPICState *opp = KVM_OPENPIC(d);
+
+ /* Trigger the GCR.RESET bit to reset the PIC */
+ kvm_openpic_write(opp, 0x1020, GCR_RESET, sizeof(uint32_t));
+}
+
static uint64_t kvm_openpic_read(void *opaque, hwaddr addr, unsigned size)
{
KVMOpenPICState *opp = opaque;
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index 3335476..b25e851 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -748,9 +748,15 @@ static void dbdma_reset(void *opaque)
void* DBDMA_init (MemoryRegion **dbdma_mem)
{
DBDMAState *s;
+ int i;
s = g_malloc0(sizeof(DBDMAState));
+ for (i = 0; i < DBDMA_CHANNELS; i++) {
+ DBDMA_io *io = &s->channels[i].io;
+ qemu_iovec_init(&io->iov, 1);
+ }
+
memory_region_init_io(&s->mem, NULL, &dbdma_ops, s, "dbdma", 0x1000);
*dbdma_mem = &s->mem;
vmstate_register(NULL, -1, &vmstate_dbdma, s);
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 7f99aa0..47f45f5 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -259,7 +259,7 @@ static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size)
static const MemoryRegionOps timer_ops = {
.read = timer_read,
.write = timer_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
};
static int macio_newworld_initfn(PCIDevice *d)
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index e36cfbe..d4a494f 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -159,7 +159,7 @@ static void ievent_set(eTSEC *etsec,
if ((flags & IEVENT_RXB && etsec->regs[IMASK].value & IMASK_RXBEN)
|| (flags & IEVENT_RXF && etsec->regs[IMASK].value & IMASK_RXFEN)) {
- qemu_irq_pulse(etsec->rx_irq);
+ qemu_irq_raise(etsec->rx_irq);
RING_DEBUG("%s Raise Rx IRQ\n", __func__);
}
}
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 635713e..af49c46 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -42,7 +42,7 @@ typedef struct sPAPRNVRAM {
#define MIN_NVRAM_SIZE 8192
#define DEFAULT_NVRAM_SIZE 65536
-#define MAX_NVRAM_SIZE (UINT16_MAX * 16)
+#define MAX_NVRAM_SIZE 1048576
static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs,
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index e12d731..1b4c0f0 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -87,8 +87,10 @@ struct PPCE500PCIState {
struct pci_outbound pob[PPCE500_PCI_NR_POBS];
struct pci_inbound pib[PPCE500_PCI_NR_PIBS];
uint32_t gasket_time;
- qemu_irq irq[4];
+ qemu_irq irq[PCI_NUM_PINS];
+ uint32_t irq_num[PCI_NUM_PINS];
uint32_t first_slot;
+ uint32_t first_pin_irq;
/* mmio maps */
MemoryRegion container;
MemoryRegion iomem;
@@ -252,26 +254,39 @@ static const MemoryRegionOps e500_pci_reg_ops = {
.endianness = DEVICE_BIG_ENDIAN,
};
-static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
+static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int pin)
{
int devno = pci_dev->devfn >> 3;
int ret;
- ret = ppce500_pci_map_irq_slot(devno, irq_num);
+ ret = ppce500_pci_map_irq_slot(devno, pin);
pci_debug("%s: devfn %x irq %d -> %d devno:%x\n", __func__,
- pci_dev->devfn, irq_num, ret, devno);
+ pci_dev->devfn, pin, ret, devno);
return ret;
}
-static void mpc85xx_pci_set_irq(void *opaque, int irq_num, int level)
+static void mpc85xx_pci_set_irq(void *opaque, int pin, int level)
{
- qemu_irq *pic = opaque;
+ PPCE500PCIState *s = opaque;
+ qemu_irq *pic = s->irq;
- pci_debug("%s: PCI irq %d, level:%d\n", __func__, irq_num, level);
+ pci_debug("%s: PCI irq %d, level:%d\n", __func__, pin , level);
- qemu_set_irq(pic[irq_num], level);
+ qemu_set_irq(pic[pin], level);
+}
+
+static PCIINTxRoute e500_route_intx_pin_to_irq(void *opaque, int pin)
+{
+ PCIINTxRoute route;
+ PPCE500PCIState *s = opaque;
+
+ route.mode = PCI_INTX_ENABLED;
+ route.irq = s->irq_num[pin];
+
+ pci_debug("%s: PCI irq-pin = %d, irq_num= %d\n", __func__, pin, route.irq);
+ return route;
}
static const VMStateDescription vmstate_pci_outbound = {
@@ -308,7 +323,7 @@ static const VMStateDescription vmstate_ppce500_pci = {
VMSTATE_STRUCT_ARRAY(pob, PPCE500PCIState, PPCE500_PCI_NR_POBS, 1,
vmstate_pci_outbound, struct pci_outbound),
VMSTATE_STRUCT_ARRAY(pib, PPCE500PCIState, PPCE500_PCI_NR_PIBS, 1,
- vmstate_pci_outbound, struct pci_inbound),
+ vmstate_pci_inbound, struct pci_inbound),
VMSTATE_UINT32(gasket_time, PPCE500PCIState),
VMSTATE_END_OF_LIST()
}
@@ -349,10 +364,14 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
sysbus_init_irq(dev, &s->irq[i]);
}
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ s->irq_num[i] = s->first_pin_irq + i;
+ }
+
memory_region_init(&s->pio, OBJECT(s), "pci-pio", PCIE500_PCI_IOLEN);
b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
- mpc85xx_pci_map_irq, s->irq, address_space_mem,
+ mpc85xx_pci_map_irq, s, address_space_mem,
&s->pio, PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS);
h->bus = b;
@@ -370,6 +389,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
memory_region_add_subregion(&s->container, PCIE500_REG_BASE, &s->iomem);
sysbus_init_mmio(dev, &s->container);
sysbus_init_mmio(dev, &s->pio);
+ pci_bus_set_route_irq_fn(b, e500_route_intx_pin_to_irq);
return 0;
}
@@ -400,6 +420,7 @@ static const TypeInfo e500_host_bridge_info = {
static Property pcihost_properties[] = {
DEFINE_PROP_UINT32("first_slot", PPCE500PCIState, first_slot, 0x11),
+ DEFINE_PROP_UINT32("first_pin_irq", PPCE500PCIState, first_pin_irq, 0x1),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 223bab9..a973c18 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -39,7 +39,6 @@
#define EPAPR_MAGIC (0x45504150)
#define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb"
-#define UIMAGE_LOAD_BASE 0
#define DTC_LOAD_PAD 0x1800000
#define DTC_PAD_MASK 0xFFFFF
#define DTB_MAX_SIZE (8 * 1024 * 1024)
@@ -128,6 +127,8 @@ static int ppce500_load_device_tree(MachineState *machine,
hwaddr addr,
hwaddr initrd_base,
hwaddr initrd_size,
+ hwaddr kernel_base,
+ hwaddr kernel_size,
bool dry_run)
{
CPUPPCState *env = first_cpu->env_ptr;
@@ -204,6 +205,13 @@ static int ppce500_load_device_tree(MachineState *machine,
if (ret < 0) {
fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n");
}
+
+ }
+
+ if (kernel_base != -1ULL) {
+ qemu_fdt_setprop_cells(fdt, "/chosen", "qemu,boot-kernel",
+ kernel_base >> 32, kernel_base,
+ kernel_size >> 32, kernel_size);
}
ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
@@ -392,20 +400,25 @@ typedef struct DeviceTreeParams {
hwaddr addr;
hwaddr initrd_base;
hwaddr initrd_size;
+ hwaddr kernel_base;
+ hwaddr kernel_size;
} DeviceTreeParams;
static void ppce500_reset_device_tree(void *opaque)
{
DeviceTreeParams *p = opaque;
ppce500_load_device_tree(p->machine, &p->params, p->addr, p->initrd_base,
- p->initrd_size, false);
+ p->initrd_size, p->kernel_base, p->kernel_size,
+ false);
}
static int ppce500_prep_device_tree(MachineState *machine,
PPCE500Params *params,
hwaddr addr,
hwaddr initrd_base,
- hwaddr initrd_size)
+ hwaddr initrd_size,
+ hwaddr kernel_base,
+ hwaddr kernel_size)
{
DeviceTreeParams *p = g_new(DeviceTreeParams, 1);
p->machine = machine;
@@ -413,12 +426,15 @@ static int ppce500_prep_device_tree(MachineState *machine,
p->addr = addr;
p->initrd_base = initrd_base;
p->initrd_size = initrd_size;
+ p->kernel_base = kernel_base;
+ p->kernel_size = kernel_size;
qemu_register_reset(ppce500_reset_device_tree, p);
/* Issue the device tree loader once, so that we get the size of the blob */
return ppce500_load_device_tree(machine, params, addr, initrd_base,
- initrd_size, true);
+ initrd_size, kernel_base, kernel_size,
+ true);
}
/* Create -kernel TLB entries for BookE. */
@@ -603,17 +619,22 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
MemoryRegion *ram = g_new(MemoryRegion, 1);
PCIBus *pci_bus;
CPUPPCState *env = NULL;
- uint64_t elf_entry;
- uint64_t elf_lowaddr;
- hwaddr entry=0;
- hwaddr loadaddr=UIMAGE_LOAD_BASE;
- target_long kernel_size=0;
- target_ulong dt_base = 0;
- target_ulong initrd_base = 0;
- target_long initrd_size = 0;
- target_ulong cur_base = 0;
+ uint64_t loadaddr;
+ hwaddr kernel_base = -1LL;
+ int kernel_size = 0;
+ hwaddr dt_base = 0;
+ hwaddr initrd_base = 0;
+ int initrd_size = 0;
+ hwaddr cur_base = 0;
+ char *filename;
+ hwaddr bios_entry = 0;
+ target_long bios_size;
+ struct boot_info *boot_info;
+ int dt_size;
int i;
- unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
+ /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
+ * 4 respectively */
+ unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
qemu_irq **irqs, *mpic;
DeviceState *dev;
CPUPPCState *firstenv = NULL;
@@ -713,12 +734,13 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
/* PCI */
dev = qdev_create(NULL, "e500-pcihost");
qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot);
+ qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]);
qdev_init_nofail(dev);
s = SYS_BUS_DEVICE(dev);
- sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]);
- sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]);
- sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]);
- sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]);
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ sysbus_connect_irq(s, i, mpic[pci_irq_nrs[i]]);
+ }
+
memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET,
sysbus_mmio_get_region(s, 0));
@@ -738,29 +760,24 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
/* Register spinning region */
sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL);
+ if (cur_base < (32 * 1024 * 1024)) {
+ /* u-boot occupies memory up to 32MB, so load blobs above */
+ cur_base = (32 * 1024 * 1024);
+ }
+
/* Load kernel. */
if (machine->kernel_filename) {
- kernel_size = load_uimage(machine->kernel_filename, &entry,
- &loadaddr, NULL);
- if (kernel_size < 0) {
- kernel_size = load_elf(machine->kernel_filename, NULL, NULL,
- &elf_entry, &elf_lowaddr, NULL, 1,
- ELF_MACHINE, 0);
- entry = elf_entry;
- loadaddr = elf_lowaddr;
- }
- /* XXX try again as binary */
+ kernel_base = cur_base;
+ kernel_size = load_image_targphys(machine->kernel_filename,
+ cur_base,
+ ram_size - cur_base);
if (kernel_size < 0) {
fprintf(stderr, "qemu: could not load kernel '%s'\n",
machine->kernel_filename);
exit(1);
}
- cur_base = loadaddr + kernel_size;
-
- /* Reserve space for dtb */
- dt_base = (cur_base + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
- cur_base += DTB_MAX_SIZE;
+ cur_base += kernel_size;
}
/* Load initrd. */
@@ -778,24 +795,60 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
cur_base = initrd_base + initrd_size;
}
- /* If we're loading a kernel directly, we must load the device tree too. */
- if (machine->kernel_filename) {
- struct boot_info *boot_info;
- int dt_size;
-
- dt_size = ppce500_prep_device_tree(machine, params, dt_base,
- initrd_base, initrd_size);
- if (dt_size < 0) {
- fprintf(stderr, "couldn't load device tree\n");
+ /*
+ * Smart firmware defaults ahead!
+ *
+ * We follow the following table to select which payload we execute.
+ *
+ * -kernel | -bios | payload
+ * ---------+-------+---------
+ * N | Y | u-boot
+ * N | N | u-boot
+ * Y | Y | u-boot
+ * Y | N | kernel
+ *
+ * This ensures backwards compatibility with how we used to expose
+ * -kernel to users but allows them to run through u-boot as well.
+ */
+ if (bios_name == NULL) {
+ if (machine->kernel_filename) {
+ bios_name = machine->kernel_filename;
+ } else {
+ bios_name = "u-boot.e500";
+ }
+ }
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+
+ bios_size = load_elf(filename, NULL, NULL, &bios_entry, &loadaddr, NULL,
+ 1, ELF_MACHINE, 0);
+ if (bios_size < 0) {
+ /*
+ * Hrm. No ELF image? Try a uImage, maybe someone is giving us an
+ * ePAPR compliant kernel
+ */
+ kernel_size = load_uimage(filename, &bios_entry, &loadaddr, NULL);
+ if (kernel_size < 0) {
+ fprintf(stderr, "qemu: could not load firmware '%s'\n", filename);
exit(1);
}
- assert(dt_size < DTB_MAX_SIZE);
+ }
- boot_info = env->load_info;
- boot_info->entry = entry;
- boot_info->dt_base = dt_base;
- boot_info->dt_size = dt_size;
+ /* Reserve space for dtb */
+ dt_base = (loadaddr + bios_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
+
+ dt_size = ppce500_prep_device_tree(machine, params, dt_base,
+ initrd_base, initrd_size,
+ kernel_base, kernel_size);
+ if (dt_size < 0) {
+ fprintf(stderr, "couldn't load device tree\n");
+ exit(1);
}
+ assert(dt_size < DTB_MAX_SIZE);
+
+ boot_info = env->load_info;
+ boot_info->entry = bios_entry;
+ boot_info->dt_base = dt_base;
+ boot_info->dt_size = dt_size;
if (kvm_enabled()) {
kvmppc_init();
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 4bdaa8d..e493dc1 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -72,6 +72,8 @@
#define MAX_IDE_BUS 2
#define CFG_ADDR 0xf0000510
#define TBFREQ (100UL * 1000UL * 1000UL)
+#define CLOCKFREQ (266UL * 1000UL * 1000UL)
+#define BUSFREQ (100UL * 1000UL * 1000UL)
/* debug UniNorth */
//#define DEBUG_UNIN
@@ -467,7 +469,8 @@ static void ppc_core99_init(MachineState *machine)
fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
}
/* Mac OS X requires a "known good" clock-frequency value; pass it one. */
- fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, 266000000);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
}
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 77598e4..4b5e905 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -46,6 +46,8 @@
#define MAX_IDE_BUS 2
#define CFG_ADDR 0xf0000510
#define TBFREQ 16600000UL
+#define CLOCKFREQ 266000000UL
+#define BUSFREQ 66000000UL
static int fw_cfg_boot_set(void *opaque, const char *boot_device)
{
@@ -337,7 +339,8 @@ static void ppc_heathrow_init(MachineState *machine)
fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
}
/* Mac OS X requires a "known good" clock-frequency value; pass it one. */
- fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, 266000000);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
}
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 71df471..bec82cd 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -29,9 +29,11 @@
#include "sysemu/cpus.h"
#include "hw/timer/m48t59.h"
#include "qemu/log.h"
+#include "qemu/error-report.h"
#include "hw/loader.h"
#include "sysemu/kvm.h"
#include "kvm_ppc.h"
+#include "trace.h"
//#define PPC_DEBUG_IRQ
//#define PPC_DEBUG_TB
@@ -49,6 +51,8 @@
# define LOG_TB(...) do { } while (0)
#endif
+#define NSEC_PER_SEC 1000000000LL
+
static void cpu_ppc_tb_stop (CPUPPCState *env);
static void cpu_ppc_tb_start (CPUPPCState *env);
@@ -829,6 +833,81 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
}
+static void timebase_pre_save(void *opaque)
+{
+ PPCTimebase *tb = opaque;
+ uint64_t ticks = cpu_get_real_ticks();
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+ if (!first_ppc_cpu->env.tb_env) {
+ error_report("No timebase object");
+ return;
+ }
+
+ tb->time_of_the_day_ns = get_clock_realtime();
+ /*
+ * tb_offset is only expected to be changed by migration so
+ * there is no need to update it from KVM here
+ */
+ tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
+}
+
+static int timebase_post_load(void *opaque, int version_id)
+{
+ PPCTimebase *tb_remote = opaque;
+ CPUState *cpu;
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+ int64_t tb_off_adj, tb_off, ns_diff;
+ int64_t migration_duration_ns, migration_duration_tb, guest_tb, host_ns;
+ unsigned long freq;
+
+ if (!first_ppc_cpu->env.tb_env) {
+ error_report("No timebase object");
+ return -1;
+ }
+
+ freq = first_ppc_cpu->env.tb_env->tb_freq;
+ /*
+ * Calculate timebase on the destination side of migration.
+ * The destination timebase must be not less than the source timebase.
+ * We try to adjust timebase by downtime if host clocks are not
+ * too much out of sync (1 second for now).
+ */
+ host_ns = get_clock_realtime();
+ ns_diff = MAX(0, host_ns - tb_remote->time_of_the_day_ns);
+ migration_duration_ns = MIN(NSEC_PER_SEC, ns_diff);
+ migration_duration_tb = muldiv64(migration_duration_ns, freq, NSEC_PER_SEC);
+ guest_tb = tb_remote->guest_timebase + MIN(0, migration_duration_tb);
+
+ tb_off_adj = guest_tb - cpu_get_real_ticks();
+
+ tb_off = first_ppc_cpu->env.tb_env->tb_offset;
+ trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off,
+ (tb_off_adj - tb_off) / freq);
+
+ /* Set new offset to all CPUs */
+ CPU_FOREACH(cpu) {
+ PowerPCCPU *pcpu = POWERPC_CPU(cpu);
+ pcpu->env.tb_env->tb_offset = tb_off_adj;
+ }
+
+ return 0;
+}
+
+const VMStateDescription vmstate_ppc_timebase = {
+ .name = "timebase",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .pre_save = timebase_pre_save,
+ .post_load = timebase_post_load,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT64(guest_timebase, PPCTimebase),
+ VMSTATE_INT64(time_of_the_day_ns, PPCTimebase),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
/* Set up (once) timebase frequency (in Hz) */
clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
{
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index adac5ff..e06321c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -34,6 +34,7 @@
#include "sysemu/kvm.h"
#include "kvm_ppc.h"
#include "mmu-hash64.h"
+#include "qom/cpu.h"
#include "hw/boards.h"
#include "hw/ppc/ppc.h"
@@ -53,6 +54,7 @@
#include "hw/usb.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "trace.h"
#include <libfdt.h>
@@ -78,13 +80,28 @@
#define TIMEBASE_FREQ 512000000ULL
#define MAX_CPUS 256
-#define XICS_IRQS 1024
#define PHANDLE_XICP 0x00001111
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
+
+typedef struct SPAPRMachine SPAPRMachine;
#define TYPE_SPAPR_MACHINE "spapr-machine"
+#define SPAPR_MACHINE(obj) \
+ OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE)
+
+/**
+ * SPAPRMachine:
+ */
+struct SPAPRMachine {
+ /*< private >*/
+ MachineState parent_obj;
+
+ /*< public >*/
+ char *kvm_type;
+};
+
sPAPREnvironment *spapr;
@@ -202,34 +219,79 @@ static XICSState *xics_system_init(int nr_servers, int nr_irqs)
return icp;
}
+static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
+ int smt_threads)
+{
+ int i, ret = 0;
+ uint32_t servers_prop[smt_threads];
+ uint32_t gservers_prop[smt_threads * 2];
+ int index = ppc_get_vcpu_dt_id(cpu);
+
+ if (cpu->cpu_version) {
+ ret = fdt_setprop(fdt, offset, "cpu-version",
+ &cpu->cpu_version, sizeof(cpu->cpu_version));
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* Build interrupt servers and gservers properties */
+ for (i = 0; i < smt_threads; i++) {
+ servers_prop[i] = cpu_to_be32(index + i);
+ /* Hack, direct the group queues back to cpu 0 */
+ gservers_prop[i*2] = cpu_to_be32(index + i);
+ gservers_prop[i*2 + 1] = 0;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+ servers_prop, sizeof(servers_prop));
+ if (ret < 0) {
+ return ret;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
+ gservers_prop, sizeof(gservers_prop));
+
+ return ret;
+}
+
static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
{
- int ret = 0, offset;
- CPUState *cpu;
+ int ret = 0, offset, cpus_offset;
+ CPUState *cs;
char cpu_model[32];
int smt = kvmppc_smt_threads();
uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
- CPU_FOREACH(cpu) {
- DeviceClass *dc = DEVICE_GET_CLASS(cpu);
- int index = ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ DeviceClass *dc = DEVICE_GET_CLASS(cs);
+ int index = ppc_get_vcpu_dt_id(cpu);
uint32_t associativity[] = {cpu_to_be32(0x5),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
cpu_to_be32(0x0),
- cpu_to_be32(cpu->numa_node),
+ cpu_to_be32(cs->numa_node),
cpu_to_be32(index)};
if ((index % smt) != 0) {
continue;
}
- snprintf(cpu_model, 32, "/cpus/%s@%x", dc->fw_name,
- index);
+ snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
- offset = fdt_path_offset(fdt, cpu_model);
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0) {
+ cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+ "cpus");
+ if (cpus_offset < 0) {
+ return cpus_offset;
+ }
+ }
+ offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
if (offset < 0) {
- return offset;
+ offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
+ if (offset < 0) {
+ return offset;
+ }
}
if (nb_numa_nodes > 1) {
@@ -245,6 +307,12 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
if (ret < 0) {
return ret;
}
+
+ ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
+ ppc_get_compat_smt_threads(cpu));
+ if (ret < 0) {
+ return ret;
+ }
}
return ret;
}
@@ -293,6 +361,10 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
} \
} while (0)
+static void add_str(GString *s, const gchar *s1)
+{
+ g_string_append_len(s, s1, strlen(s1) + 1);
+}
static void *spapr_create_fdt_skel(hwaddr initrd_base,
hwaddr initrd_size,
@@ -306,13 +378,26 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
CPUState *cs;
uint32_t start_prop = cpu_to_be32(initrd_base);
uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
- char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
- "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
- char qemu_hypertas_prop[] = "hcall-memop1";
+ GString *hypertas = g_string_sized_new(256);
+ GString *qemu_hypertas = g_string_sized_new(256);
uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
- int i, smt = kvmppc_smt_threads();
+ int smt = kvmppc_smt_threads();
unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
+ QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
+ unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
+ uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+
+ add_str(hypertas, "hcall-pft");
+ add_str(hypertas, "hcall-term");
+ add_str(hypertas, "hcall-dabr");
+ add_str(hypertas, "hcall-interrupt");
+ add_str(hypertas, "hcall-tce");
+ add_str(hypertas, "hcall-vio");
+ add_str(hypertas, "hcall-splpar");
+ add_str(hypertas, "hcall-bulk");
+ add_str(hypertas, "hcall-set-mode");
+ add_str(qemu_hypertas, "hcall-memop1");
fdt = g_malloc0(FDT_MAX_SIZE);
_FDT((fdt_create(fdt, FDT_MAX_SIZE)));
@@ -375,8 +460,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
DeviceClass *dc = DEVICE_GET_CLASS(cs);
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
int index = ppc_get_vcpu_dt_id(cpu);
- uint32_t servers_prop[smp_threads];
- uint32_t gservers_prop[smp_threads * 2];
char *nodename;
uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
0xffffffff, 0xffffffff};
@@ -425,18 +508,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
_FDT((fdt_property_string(fdt, "status", "okay")));
_FDT((fdt_property(fdt, "64-bit", NULL, 0)));
- /* Build interrupt servers and gservers properties */
- for (i = 0; i < smp_threads; i++) {
- servers_prop[i] = cpu_to_be32(index + i);
- /* Hack, direct the group queues back to cpu 0 */
- gservers_prop[i*2] = cpu_to_be32(index + i);
- gservers_prop[i*2 + 1] = 0;
- }
- _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
- servers_prop, sizeof(servers_prop))));
- _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
- gservers_prop, sizeof(gservers_prop))));
-
if (env->spr_cb[SPR_PURR].oea_read) {
_FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
}
@@ -470,6 +541,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
page_sizes_prop, page_sizes_prop_size)));
}
+ _FDT((fdt_property_cell(fdt, "ibm,chip-id",
+ cs->cpu_index / cpus_per_socket)));
+
_FDT((fdt_end_node(fdt)));
}
@@ -478,10 +552,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
/* RTAS */
_FDT((fdt_begin_node(fdt, "rtas")));
- _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
- sizeof(hypertas_prop))));
- _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
- sizeof(qemu_hypertas_prop))));
+ if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+ add_str(hypertas, "hcall-multi-tce");
+ }
+ _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
+ hypertas->len)));
+ g_string_free(hypertas, TRUE);
+ _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
+ qemu_hypertas->len)));
+ g_string_free(qemu_hypertas, TRUE);
_FDT((fdt_property(fdt, "ibm,associativity-reference-points",
refpoints, sizeof(refpoints))));
@@ -521,12 +600,68 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
/* event-sources */
spapr_events_fdt_skel(fdt, epow_irq);
+ /* /hypervisor node */
+ if (kvm_enabled()) {
+ uint8_t hypercall[16];
+
+ /* indicate KVM hypercall interface */
+ _FDT((fdt_begin_node(fdt, "hypervisor")));
+ _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
+ if (kvmppc_has_cap_fixup_hcalls()) {
+ /*
+ * Older KVM versions with older guest kernels were broken with the
+ * magic page, don't allow the guest to map it.
+ */
+ kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+ sizeof(hypercall));
+ _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
+ sizeof(hypercall))));
+ }
+ _FDT((fdt_end_node(fdt)));
+ }
+
_FDT((fdt_end_node(fdt))); /* close root node */
_FDT((fdt_finish(fdt)));
return fdt;
}
+int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
+{
+ void *fdt, *fdt_skel;
+ sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
+
+ size -= sizeof(hdr);
+
+ /* Create sceleton */
+ fdt_skel = g_malloc0(size);
+ _FDT((fdt_create(fdt_skel, size)));
+ _FDT((fdt_begin_node(fdt_skel, "")));
+ _FDT((fdt_end_node(fdt_skel)));
+ _FDT((fdt_finish(fdt_skel)));
+ fdt = g_malloc0(size);
+ _FDT((fdt_open_into(fdt_skel, fdt, size)));
+ g_free(fdt_skel);
+
+ /* Fix skeleton up */
+ _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
+
+ /* Pack resulting tree */
+ _FDT((fdt_pack(fdt)));
+
+ if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
+ trace_spapr_cas_failed(size);
+ return -1;
+ }
+
+ cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
+ cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
+ trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
+ g_free(fdt);
+
+ return 0;
+}
+
static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
{
uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
@@ -817,14 +952,14 @@ static int spapr_vga_init(PCIBus *pci_bus)
static const VMStateDescription vmstate_spapr = {
.name = "spapr",
- .version_id = 1,
+ .version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(next_irq, sPAPREnvironment),
/* RTC offset */
VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
-
+ VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2),
VMSTATE_END_OF_LIST()
},
};
@@ -1251,6 +1386,12 @@ static void ppc_spapr_init(MachineState *machine)
kvmppc_set_papr(cpu);
}
+ if (cpu->max_compat) {
+ if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
+ exit(1);
+ }
+ }
+
xics_cpu_setup(spapr->icp, cpu);
qemu_register_reset(spapr_cpu_reset, cpu);
@@ -1475,6 +1616,27 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
return NULL;
}
+static char *spapr_get_kvm_type(Object *obj, Error **errp)
+{
+ SPAPRMachine *sm = SPAPR_MACHINE(obj);
+
+ return g_strdup(sm->kvm_type);
+}
+
+static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
+{
+ SPAPRMachine *sm = SPAPR_MACHINE(obj);
+
+ g_free(sm->kvm_type);
+ sm->kvm_type = g_strdup(value);
+}
+
+static void spapr_machine_initfn(Object *obj)
+{
+ object_property_add_str(obj, "kvm-type",
+ spapr_get_kvm_type, spapr_set_kvm_type, NULL);
+}
+
static void spapr_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1497,6 +1659,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
static const TypeInfo spapr_machine_info = {
.name = TYPE_SPAPR_MACHINE,
.parent = TYPE_MACHINE,
+ .instance_size = sizeof(SPAPRMachine),
+ .instance_init = spapr_machine_initfn,
.class_init = spapr_machine_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_FW_PATH_PROVIDER },
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0bae053..7952077 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -3,6 +3,9 @@
#include "helper_regs.h"
#include "hw/ppc/spapr.h"
#include "mmu-hash64.h"
+#include "cpu-models.h"
+#include "trace.h"
+#include "kvm_ppc.h"
struct SPRSyncState {
CPUState *cs;
@@ -709,47 +712,218 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
+static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
+ target_ulong mflags,
+ target_ulong value1,
+ target_ulong value2)
+{
+ CPUState *cs;
+
+ if (value1) {
+ return H_P3;
+ }
+ if (value2) {
+ return H_P4;
+ }
+
+ switch (mflags) {
+ case H_SET_MODE_ENDIAN_BIG:
+ CPU_FOREACH(cs) {
+ set_spr(cs, SPR_LPCR, 0, LPCR_ILE);
+ }
+ return H_SUCCESS;
+
+ case H_SET_MODE_ENDIAN_LITTLE:
+ CPU_FOREACH(cs) {
+ set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE);
+ }
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED_FLAG;
+}
+
+static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu,
+ target_ulong mflags,
+ target_ulong value1,
+ target_ulong value2)
+{
+ CPUState *cs;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ target_ulong prefix;
+
+ if (!(pcc->insns_flags2 & PPC2_ISA207S)) {
+ return H_P2;
+ }
+ if (value1) {
+ return H_P3;
+ }
+ if (value2) {
+ return H_P4;
+ }
+
+ switch (mflags) {
+ case H_SET_MODE_ADDR_TRANS_NONE:
+ prefix = 0;
+ break;
+ case H_SET_MODE_ADDR_TRANS_0001_8000:
+ prefix = 0x18000;
+ break;
+ case H_SET_MODE_ADDR_TRANS_C000_0000_0000_4000:
+ prefix = 0xC000000000004000;
+ break;
+ default:
+ return H_UNSUPPORTED_FLAG;
+ }
+
+ CPU_FOREACH(cs) {
+ CPUPPCState *env = &POWERPC_CPU(cpu)->env;
+
+ set_spr(cs, SPR_LPCR, mflags << LPCR_AIL_SHIFT, LPCR_AIL);
+ env->excp_prefix = prefix;
+ }
+
+ return H_SUCCESS;
+}
+
static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
target_ulong opcode, target_ulong *args)
{
- CPUState *cs;
- target_ulong mflags = args[0];
target_ulong resource = args[1];
- target_ulong value1 = args[2];
- target_ulong value2 = args[3];
target_ulong ret = H_P2;
- if (resource == H_SET_MODE_RESOURCE_LE) {
- if (value1) {
- ret = H_P3;
- goto out;
- }
- if (value2) {
- ret = H_P4;
- goto out;
- }
- switch (mflags) {
- case H_SET_MODE_ENDIAN_BIG:
- CPU_FOREACH(cs) {
- set_spr(cs, SPR_LPCR, 0, LPCR_ILE);
- }
- ret = H_SUCCESS;
- break;
+ switch (resource) {
+ case H_SET_MODE_RESOURCE_LE:
+ ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]);
+ break;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0],
+ args[2], args[3]);
+ break;
+ }
+
+ return ret;
+}
+
+typedef struct {
+ PowerPCCPU *cpu;
+ uint32_t cpu_version;
+ int ret;
+} SetCompatState;
- case H_SET_MODE_ENDIAN_LITTLE:
- CPU_FOREACH(cs) {
- set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE);
+static void do_set_compat(void *arg)
+{
+ SetCompatState *s = arg;
+
+ cpu_synchronize_state(CPU(s->cpu));
+ s->ret = ppc_set_compat(s->cpu, s->cpu_version);
+}
+
+#define get_compat_level(cpuver) ( \
+ ((cpuver) == CPU_POWERPC_LOGICAL_2_05) ? 2050 : \
+ ((cpuver) == CPU_POWERPC_LOGICAL_2_06) ? 2060 : \
+ ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \
+ ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
+
+static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
+ sPAPREnvironment *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong list = args[0];
+ PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_);
+ CPUState *cs;
+ bool cpu_match = false;
+ unsigned old_cpu_version = cpu_->cpu_version;
+ unsigned compat_lvl = 0, cpu_version = 0;
+ unsigned max_lvl = get_compat_level(cpu_->max_compat);
+ int counter;
+
+ /* Parse PVR list */
+ for (counter = 0; counter < 512; ++counter) {
+ uint32_t pvr, pvr_mask;
+
+ pvr_mask = rtas_ld(list, 0);
+ list += 4;
+ pvr = rtas_ld(list, 0);
+ list += 4;
+
+ trace_spapr_cas_pvr_try(pvr);
+ if (!max_lvl &&
+ ((cpu_->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask))) {
+ cpu_match = true;
+ cpu_version = 0;
+ } else if (pvr == cpu_->cpu_version) {
+ cpu_match = true;
+ cpu_version = cpu_->cpu_version;
+ } else if (!cpu_match) {
+ /* If it is a logical PVR, try to determine the highest level */
+ unsigned lvl = get_compat_level(pvr);
+ if (lvl) {
+ bool is205 = (pcc_->pcr_mask & PCR_COMPAT_2_05) &&
+ (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05));
+ bool is206 = (pcc_->pcr_mask & PCR_COMPAT_2_06) &&
+ ((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) ||
+ (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS)));
+
+ if (is205 || is206) {
+ if (!max_lvl) {
+ /* User did not set the level, choose the highest */
+ if (compat_lvl <= lvl) {
+ compat_lvl = lvl;
+ cpu_version = pvr;
+ }
+ } else if (max_lvl >= lvl) {
+ /* User chose the level, don't set higher than this */
+ compat_lvl = lvl;
+ cpu_version = pvr;
+ }
+ }
}
- ret = H_SUCCESS;
+ }
+ /* Terminator record */
+ if (~pvr_mask & pvr) {
break;
+ }
+ }
- default:
- ret = H_UNSUPPORTED_FLAG;
+ /* For the future use: here @list points to the first capability */
+
+ /* Parsing finished */
+ trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match,
+ cpu_version, pcc_->pcr_mask);
+
+ /* Update CPUs */
+ if (old_cpu_version != cpu_version) {
+ CPU_FOREACH(cs) {
+ SetCompatState s = {
+ .cpu = POWERPC_CPU(cs),
+ .cpu_version = cpu_version,
+ .ret = 0
+ };
+
+ run_on_cpu(cs, do_set_compat, &s);
+
+ if (s.ret < 0) {
+ fprintf(stderr, "Unable to set compatibility mode\n");
+ return H_HARDWARE;
+ }
}
}
-out:
- return ret;
+ if (!cpu_version) {
+ return H_SUCCESS;
+ }
+
+ if (!list) {
+ return H_SUCCESS;
+ }
+
+ if (spapr_h_cas_compose_response(args[1], args[2])) {
+ qemu_system_reset_request();
+ }
+
+ return H_SUCCESS;
}
static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
@@ -831,6 +1005,9 @@ static void hypercall_register_types(void)
spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
spapr_register_hypercall(H_SET_MODE, h_set_mode);
+
+ /* ibm,client-architecture-support support */
+ spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
}
type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 2887ad4..9e49ec4 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -35,6 +35,9 @@ enum sPAPRTCEAccess {
SPAPR_TCE_RW = 3,
};
+#define IOMMU_PAGE_SIZE(shift) (1ULL << (shift))
+#define IOMMU_PAGE_MASK(shift) (~(IOMMU_PAGE_SIZE(shift) - 1))
+
static QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables;
static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
@@ -70,12 +73,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
if (tcet->bypass) {
ret.perm = IOMMU_RW;
- } else if (addr < tcet->window_size) {
+ } else if ((addr >> tcet->page_shift) < tcet->nb_table) {
/* Check if we are in bound */
- tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
- ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
- ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
- ret.addr_mask = SPAPR_TCE_PAGE_MASK;
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+ tce = tcet->table[addr >> tcet->page_shift];
+ ret.iova = addr & page_mask;
+ ret.translated_addr = tce & page_mask;
+ ret.addr_mask = ~page_mask;
ret.perm = tce;
}
trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
@@ -84,24 +89,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
return ret;
}
-static int spapr_tce_table_pre_load(void *opaque)
-{
- sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
-
- tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT;
-
- return 0;
-}
-
static const VMStateDescription vmstate_spapr_tce_table = {
.name = "spapr_iommu",
- .version_id = 1,
- .minimum_version_id = 1,
- .pre_load = spapr_tce_table_pre_load,
- .fields = (VMStateField[]) {
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .fields = (VMStateField []) {
/* Sanity check */
VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
- VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
+ VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable),
/* IOMMU state */
VMSTATE_BOOL(bypass, sPAPRTCETable),
@@ -121,28 +116,33 @@ static int spapr_tce_table_realize(DeviceState *dev)
if (kvm_enabled()) {
tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
- tcet->window_size,
+ tcet->nb_table <<
+ tcet->page_shift,
&tcet->fd);
}
if (!tcet->table) {
- size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
- * sizeof(uint64_t);
+ size_t table_size = tcet->nb_table * sizeof(uint64_t);
tcet->table = g_malloc0(table_size);
}
- tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT;
trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd);
memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
- "iommu-spapr", UINT64_MAX);
+ "iommu-spapr", ram_size);
QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
+ vmstate_register(DEVICE(tcet), tcet->liobn, &vmstate_spapr_tce_table,
+ tcet);
+
return 0;
}
-sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t window_size)
+sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+ uint64_t bus_offset,
+ uint32_t page_shift,
+ uint32_t nb_table)
{
sPAPRTCETable *tcet;
@@ -152,17 +152,19 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t wi
return NULL;
}
- if (!window_size) {
+ if (!nb_table) {
return NULL;
}
tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
tcet->liobn = liobn;
- tcet->window_size = window_size;
+ tcet->bus_offset = bus_offset;
+ tcet->page_shift = page_shift;
+ tcet->nb_table = nb_table;
object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
- qdev_init_nofail(DEVICE(tcet));
+ object_property_set_bool(OBJECT(tcet), true, "realized", NULL);
return tcet;
}
@@ -175,7 +177,7 @@ static void spapr_tce_table_finalize(Object *obj)
if (!kvm_enabled() ||
(kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
- tcet->window_size) != 0)) {
+ tcet->nb_table) != 0)) {
g_free(tcet->table);
}
}
@@ -193,8 +195,7 @@ void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
static void spapr_tce_reset(DeviceState *dev)
{
sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
- size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
- * sizeof(uint64_t);
+ size_t table_size = tcet->nb_table * sizeof(uint64_t);
tcet->bypass = false;
memset(tcet->table, 0, table_size);
@@ -204,25 +205,110 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
target_ulong tce)
{
IOMMUTLBEntry entry;
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+ unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
- if (ioba >= tcet->window_size) {
+ if (index >= tcet->nb_table) {
hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
TARGET_FMT_lx "\n", ioba);
return H_PARAMETER;
}
- tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
+ tcet->table[index] = tce;
entry.target_as = &address_space_memory,
- entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
- entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
- entry.addr_mask = SPAPR_TCE_PAGE_MASK;
+ entry.iova = ioba & page_mask;
+ entry.translated_addr = tce & page_mask;
+ entry.addr_mask = ~page_mask;
entry.perm = tce;
memory_region_notify_iommu(&tcet->iommu, entry);
return H_SUCCESS;
}
+static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
+ sPAPREnvironment *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ int i;
+ target_ulong liobn = args[0];
+ target_ulong ioba = args[1];
+ target_ulong ioba1 = ioba;
+ target_ulong tce_list = args[2];
+ target_ulong npages = args[3];
+ target_ulong ret = H_PARAMETER;
+ sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+ CPUState *cs = CPU(cpu);
+ hwaddr page_mask, page_size;
+
+ if (!tcet) {
+ return H_PARAMETER;
+ }
+
+ if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) {
+ return H_PARAMETER;
+ }
+
+ page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+ page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+ ioba &= page_mask;
+
+ for (i = 0; i < npages; ++i, ioba += page_size) {
+ target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
+ i * sizeof(target_ulong);
+ target_ulong tce = ldq_phys(cs->as, off);
+
+ ret = put_tce_emu(tcet, ioba, tce);
+ if (ret) {
+ break;
+ }
+ }
+
+ /* Trace last successful or the first problematic entry */
+ i = i ? (i - 1) : 0;
+ trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i,
+ ldq_phys(cs->as,
+ tce_list + i * sizeof(target_ulong)),
+ ret);
+
+ return ret;
+}
+
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ int i;
+ target_ulong liobn = args[0];
+ target_ulong ioba = args[1];
+ target_ulong tce_value = args[2];
+ target_ulong npages = args[3];
+ target_ulong ret = H_PARAMETER;
+ sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+ hwaddr page_mask, page_size;
+
+ if (!tcet) {
+ return H_PARAMETER;
+ }
+
+ if (npages > tcet->nb_table) {
+ return H_PARAMETER;
+ }
+
+ page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+ page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+ ioba &= page_mask;
+
+ for (i = 0; i < npages; ++i, ioba += page_size) {
+ ret = put_tce_emu(tcet, ioba, tce_value);
+ if (ret) {
+ break;
+ }
+ }
+ trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+
+ return ret;
+}
+
static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
target_ulong opcode, target_ulong *args)
{
@@ -232,9 +318,11 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
target_ulong ret = H_PARAMETER;
sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
- ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
-
if (tcet) {
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+ ioba &= page_mask;
+
ret = put_tce_emu(tcet, ioba, tce);
}
trace_spapr_iommu_put(liobn, ioba, tce, ret);
@@ -245,13 +333,15 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
target_ulong *tce)
{
- if (ioba >= tcet->window_size) {
+ unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+ if (index >= tcet->nb_table) {
hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
TARGET_FMT_lx "\n", ioba);
return H_PARAMETER;
}
- *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT];
+ *tce = tcet->table[index];
return H_SUCCESS;
}
@@ -265,9 +355,11 @@ static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
target_ulong ret = H_PARAMETER;
sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
- ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
-
if (tcet) {
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+ ioba &= page_mask;
+
ret = get_tce_emu(tcet, ioba, &tce);
if (!ret) {
args[0] = tce;
@@ -316,13 +408,12 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
}
return spapr_dma_dt(fdt, node_off, propname,
- tcet->liobn, 0, tcet->window_size);
+ tcet->liobn, 0, tcet->nb_table << tcet->page_shift);
}
static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- dc->vmsd = &vmstate_spapr_tce_table;
dc->init = spapr_tce_table_realize;
dc->reset = spapr_tce_reset;
@@ -331,6 +422,8 @@ static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
/* hcall-tce */
spapr_register_hypercall(H_PUT_TCE, h_put_tce);
spapr_register_hypercall(H_GET_TCE, h_get_tce);
+ spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
+ spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
}
static TypeInfo spapr_tce_table_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index a4a51d4..988f8cb 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -280,7 +280,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
unsigned int seq_num = rtas_ld(args, 5);
unsigned int ret_intr_type;
- int ndev, irq;
+ int ndev, irq, max_irqs = 0;
sPAPRPHBState *phb = NULL;
PCIDevice *pdev = NULL;
@@ -333,6 +333,23 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
+ /* Check if the device supports as many IRQs as requested */
+ if (ret_intr_type == RTAS_TYPE_MSI) {
+ max_irqs = msi_nr_vectors_allocated(pdev);
+ } else if (ret_intr_type == RTAS_TYPE_MSIX) {
+ max_irqs = pdev->msix_entries_nr;
+ }
+ if (!max_irqs) {
+ error_report("Requested interrupt type %d is not enabled for device#%d",
+ ret_intr_type, ndev);
+ rtas_st(rets, 0, -1); /* Hardware error */
+ return;
+ }
+ /* Correct the number if the guest asked for too many */
+ if (req_num > max_irqs) {
+ req_num = max_irqs;
+ }
+
/* Check if there is an old config and MSI number has not changed */
if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
/* Unexpected behaviour */
@@ -511,6 +528,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
SysBusDevice *s = SYS_BUS_DEVICE(dev);
sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
PCIHostState *phb = PCI_HOST_BRIDGE(s);
+ sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
char *namebuf;
int i;
PCIBus *bus;
@@ -575,23 +593,14 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
&sphb->memwindow);
- /* On ppc, we only have MMIO no specific IO space from the CPU
- * perspective. In theory we ought to be able to embed the PCI IO
- * memory region direction in the system memory space. However,
- * if any of the IO BAR subregions use the old_portio mechanism,
- * that won't be processed properly unless accessed from the
- * system io address space. This hack to bounce things via
- * system_io works around the problem until all the users of
- * old_portion are updated */
+ /* Initialize IO regions */
sprintf(namebuf, "%s.io", sphb->dtbusname);
memory_region_init(&sphb->iospace, OBJECT(sphb),
namebuf, SPAPR_PCI_IO_WIN_SIZE);
- /* FIXME: fix to support multiple PHBs */
- memory_region_add_subregion(get_system_io(), 0, &sphb->iospace);
sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
- get_system_io(), 0, SPAPR_PCI_IO_WIN_SIZE);
+ &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
&sphb->iowindow);
@@ -601,16 +610,18 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
phb->bus = bus;
- sphb->dma_window_start = 0;
- sphb->dma_window_size = 0x40000000;
- sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
- sphb->dma_window_size);
- if (!sphb->tcet) {
- error_setg(errp, "Unable to create TCE table for %s",
- sphb->dtbusname);
- return;
- }
- address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
+ /*
+ * Initialize PHB address space.
+ * By default there will be at least one subregion for default
+ * 32bit DMA window.
+ * Later the guest might want to create another DMA window
+ * which will become another memory subregion.
+ */
+ sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
+
+ memory_region_init(&sphb->iommu_root, OBJECT(sphb),
+ namebuf, UINT64_MAX);
+ address_space_init(&sphb->iommu_as, &sphb->iommu_root,
sphb->dtbusname);
pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
@@ -631,15 +642,49 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
sphb->lsi_table[i].irq = irq;
}
+
+ if (!info->finish_realize) {
+ error_setg(errp, "finish_realize not defined");
+ return;
+ }
+
+ info->finish_realize(sphb, errp);
}
-static void spapr_phb_reset(DeviceState *qdev)
+static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
{
- SysBusDevice *s = SYS_BUS_DEVICE(qdev);
- sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+ sPAPRTCETable *tcet;
+ tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+ 0,
+ SPAPR_TCE_PAGE_SHIFT,
+ 0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+ if (!tcet) {
+ error_setg(errp, "Unable to create TCE table for %s",
+ sphb->dtbusname);
+ return ;
+ }
+
+ /* Register default 32bit DMA window */
+ memory_region_add_subregion(&sphb->iommu_root, 0,
+ spapr_tce_get_iommu(tcet));
+}
+
+static int spapr_phb_children_reset(Object *child, void *opaque)
+{
+ DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
+
+ if (dev) {
+ device_reset(dev);
+ }
+
+ return 0;
+}
+
+static void spapr_phb_reset(DeviceState *qdev)
+{
/* Reset the IOMMU state */
- device_reset(DEVICE(sphb->tcet));
+ object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
}
static Property spapr_phb_properties[] = {
@@ -711,6 +756,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
{
PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
+ sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
hc->root_bus_path = spapr_phb_root_bus_path;
dc->realize = spapr_phb_realize;
@@ -719,6 +765,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_spapr_pci;
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->cannot_instantiate_with_device_add_yet = false;
+ spc->finish_realize = spapr_phb_finish_realize;
}
static const TypeInfo spapr_phb_info = {
@@ -726,6 +773,7 @@ static const TypeInfo spapr_phb_info = {
.parent = TYPE_PCI_HOST_BRIDGE,
.instance_size = sizeof(sPAPRPHBState),
.class_init = spapr_phb_class_init,
+ .class_size = sizeof(sPAPRPHBClass),
};
PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
@@ -750,6 +798,29 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
#define b_fff(x) b_x((x), 8, 3) /* function number */
#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */
+typedef struct sPAPRTCEDT {
+ void *fdt;
+ int node_off;
+} sPAPRTCEDT;
+
+static int spapr_phb_children_dt(Object *child, void *opaque)
+{
+ sPAPRTCEDT *p = opaque;
+ sPAPRTCETable *tcet;
+
+ tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+ if (!tcet) {
+ return 0;
+ }
+
+ spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
+ tcet->liobn, tcet->bus_offset,
+ tcet->nb_table << tcet->page_shift);
+ /* Stop after the first window */
+
+ return 1;
+}
+
int spapr_populate_pci_dt(sPAPRPHBState *phb,
uint32_t xics_phandle,
void *fdt)
@@ -805,6 +876,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
_FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
_FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
_FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
/* Build the interrupt-map, this must matches what is done
* in pci_spapr_map_irq
@@ -829,9 +901,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
sizeof(interrupt_map)));
- spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
- phb->dma_liobn, phb->dma_window_start,
- phb->dma_window_size);
+ object_child_foreach(OBJECT(phb), spapr_phb_children_dt,
+ &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off }));
return 0;
}
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index bce8d7f..04e16ae 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -456,7 +456,11 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
if (pc->rtce_window_size) {
uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
- dev->tcet = spapr_tce_new_table(qdev, liobn, pc->rtce_window_size);
+ dev->tcet = spapr_tce_new_table(qdev, liobn,
+ 0,
+ SPAPR_TCE_PAGE_SHIFT,
+ pc->rtce_window_size >>
+ SPAPR_TCE_PAGE_SHIFT);
address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
}