aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS21
-rw-r--r--block/file-posix.c27
-rw-r--r--block/io.c2
-rw-r--r--default-configs/devices/ppc-softmmu.mak2
-rw-r--r--docs/pcie_pci_bridge.txt6
-rw-r--r--docs/system/deprecated.rst18
-rw-r--r--docs/system/ppc/ppce500.rst10
-rw-r--r--hw/acpi/generic_event_device.c2
-rw-r--r--hw/block/block.c42
-rw-r--r--hw/block/dataplane/virtio-blk.c16
-rw-r--r--hw/hyperv/vmbus.c20
-rw-r--r--hw/i386/acpi-build.c9
-rw-r--r--hw/net/virtio-net.c1
-rw-r--r--hw/pci-host/q35.c3
-rw-r--r--hw/ppc/Kconfig5
-rw-r--r--hw/ppc/meson.build3
-rw-r--r--hw/ppc/pegasos2.c789
-rw-r--r--hw/ppc/spapr.c77
-rw-r--r--hw/ppc/spapr_caps.c41
-rw-r--r--hw/ppc/spapr_hcall.c24
-rw-r--r--hw/ppc/spapr_vof.c167
-rw-r--r--hw/ppc/trace-events24
-rw-r--r--hw/ppc/vof.c1053
-rw-r--r--hw/s390x/virtio-ccw.c6
-rw-r--r--hw/scsi/virtio-scsi-dataplane.c16
-rw-r--r--hw/vfio/common.c315
-rw-r--r--hw/virtio/virtio-mem.c391
-rw-r--r--hw/virtio/virtio-mmio.c6
-rw-r--r--hw/virtio/virtio-pci.c33
-rw-r--r--hw/virtio/virtio.c16
-rw-r--r--include/block/aio.h31
-rw-r--r--include/exec/memory.h324
-rw-r--r--include/hw/block/block.h3
-rw-r--r--include/hw/ppc/spapr.h31
-rw-r--r--include/hw/ppc/vof.h58
-rw-r--r--include/hw/vfio/vfio-common.h12
-rw-r--r--include/hw/virtio/virtio-mem.h3
-rw-r--r--include/migration/vmstate.h1
-rw-r--r--include/qemu/main-loop.h4
-rw-r--r--include/standard-headers/asm-x86/kvm_para.h13
-rw-r--r--include/standard-headers/drm/drm_fourcc.h7
-rw-r--r--include/standard-headers/linux/ethtool.h4
-rw-r--r--include/standard-headers/linux/input-event-codes.h1
-rw-r--r--include/standard-headers/linux/virtio_ids.h2
-rw-r--r--include/standard-headers/linux/virtio_vsock.h9
-rw-r--r--linux-headers/asm-arm64/kvm.h11
-rw-r--r--linux-headers/asm-generic/mman-common.h3
-rw-r--r--linux-headers/asm-generic/unistd.h4
-rw-r--r--linux-headers/asm-mips/mman.h3
-rw-r--r--linux-headers/asm-mips/unistd_n32.h1
-rw-r--r--linux-headers/asm-mips/unistd_n64.h1
-rw-r--r--linux-headers/asm-mips/unistd_o32.h1
-rw-r--r--linux-headers/asm-powerpc/unistd_32.h1
-rw-r--r--linux-headers/asm-powerpc/unistd_64.h1
-rw-r--r--linux-headers/asm-s390/unistd_32.h1
-rw-r--r--linux-headers/asm-s390/unistd_64.h1
-rw-r--r--linux-headers/asm-x86/kvm.h13
-rw-r--r--linux-headers/asm-x86/unistd_32.h7
-rw-r--r--linux-headers/asm-x86/unistd_64.h7
-rw-r--r--linux-headers/asm-x86/unistd_x32.h7
-rw-r--r--linux-headers/linux/kvm.h105
-rw-r--r--linux-headers/linux/userfaultfd.h11
-rw-r--r--pc-bios/README4
-rw-r--r--pc-bios/u-boot.e500bin406920 -> 421720 bytes
-rw-r--r--pc-bios/vof-nvram.binbin0 -> 16384 bytes
-rwxr-xr-xpc-bios/vof.binbin0 -> 3456 bytes
-rw-r--r--pc-bios/vof/Makefile23
-rw-r--r--pc-bios/vof/bootmem.c14
-rw-r--r--pc-bios/vof/ci.c91
-rw-r--r--pc-bios/vof/entry.S49
-rw-r--r--pc-bios/vof/libc.c66
-rw-r--r--pc-bios/vof/main.c21
-rw-r--r--pc-bios/vof/vof.h41
-rw-r--r--pc-bios/vof/vof.lds48
m---------roms/u-boot0
-rw-r--r--softmmu/memory.c98
-rw-r--r--softmmu/physmem.c108
-rw-r--r--target/ppc/arch_dump.c8
-rw-r--r--target/ppc/cpu-qom.h2
-rw-r--r--target/ppc/cpu.c2
-rw-r--r--target/ppc/cpu.h15
-rw-r--r--target/ppc/cpu_init.c64
-rw-r--r--target/ppc/excp_helper.c3
-rw-r--r--target/ppc/kvm.c12
-rw-r--r--target/ppc/kvm_ppc.h12
-rw-r--r--target/ppc/mmu-book3s-v3.c19
-rw-r--r--target/ppc/mmu-book3s-v3.h6
-rw-r--r--target/ppc/mmu-books.h30
-rw-r--r--target/ppc/mmu-hash32.c254
-rw-r--r--target/ppc/mmu-hash32.h8
-rw-r--r--target/ppc/mmu-hash64.c157
-rw-r--r--target/ppc/mmu-hash64.h6
-rw-r--r--target/ppc/mmu-radix64.c151
-rw-r--r--target/ppc/mmu-radix64.h6
-rw-r--r--target/ppc/mmu_helper.c215
-rw-r--r--target/ppc/translate.c5
-rw-r--r--tests/data/acpi/pc/DSDTbin6002 -> 6002 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.acpihmatbin7327 -> 7327 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.bridgebin8668 -> 8668 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.cphpbin6466 -> 6466 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.dimmpxmbin7656 -> 7656 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.hpbridgebin5969 -> 5969 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.ipmikcsbin6074 -> 6074 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.memhpbin7361 -> 7361 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.nohpetbin5860 -> 5860 bytes
-rw-r--r--tests/data/acpi/pc/DSDT.numamembin6008 -> 6008 bytes
-rw-r--r--tests/qemu-iotests/172.out38
-rw-r--r--tests/qtest/rtas-test.c15
-rw-r--r--tests/unit/ptimer-test-stubs.c2
-rw-r--r--util/async.c25
-rw-r--r--util/main-loop.c4
-rw-r--r--util/mmap-alloc.c2
112 files changed, 4699 insertions, 751 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 0e9f338..6c27914 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1360,6 +1360,18 @@ F: hw/pci-host/mv64361.c
F: hw/pci-host/mv643xx.h
F: include/hw/pci-host/mv64361.h
+Virtual Open Firmware (VOF)
+M: Alexey Kardashevskiy <aik@ozlabs.ru>
+R: David Gibson <david@gibson.dropbear.id.au>
+R: Greg Kurz <groug@kaod.org>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: hw/ppc/spapr_vof*
+F: hw/ppc/vof*
+F: include/hw/ppc/vof*
+F: pc-bios/vof/*
+F: pc-bios/vof*
+
RISC-V Machines
---------------
OpenTitan
@@ -1952,6 +1964,15 @@ F: include/sysemu/rng*.h
F: backends/rng*.c
F: tests/qtest/virtio-rng-test.c
+vhost-user-rng
+M: Mathieu Poirier <mathieu.poirier@linaro.org>
+S: Supported
+F: docs/tools/vhost-user-rng.rst
+F: hw/virtio/vhost-user-rng.c
+F: hw/virtio/vhost-user-rng-pci.c
+F: include/hw/virtio/vhost-user-rng.h
+F: tools/vhost-user-rng/*
+
virtio-crypto
M: Gonglei <arei.gonglei@huawei.com>
S: Supported
diff --git a/block/file-posix.c b/block/file-posix.c
index a26eab0..cb9bffe 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -46,6 +46,7 @@
#if defined(HAVE_HOST_BLOCK_DEVICE)
#include <paths.h>
#include <sys/param.h>
+#include <sys/mount.h>
#include <IOKit/IOKitLib.h>
#include <IOKit/IOBSD.h>
#include <IOKit/storage/IOMediaBSDClient.h>
@@ -1254,6 +1255,15 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
return;
}
+#if defined(__APPLE__) && (__MACH__)
+ struct statfs buf;
+
+ if (!fstatfs(s->fd, &buf)) {
+ bs->bl.opt_transfer = buf.f_iosize;
+ bs->bl.pdiscard_alignment = buf.f_bsize;
+ }
+#endif
+
if (bs->sg || S_ISBLK(st.st_mode)) {
int ret = hdev_get_max_hw_transfer(s->fd, &st);
@@ -1591,6 +1601,7 @@ out:
}
}
+#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD)
static int translate_err(int err)
{
if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1599,6 +1610,7 @@ static int translate_err(int err)
}
return err;
}
+#endif
#ifdef CONFIG_FALLOCATE
static int do_fallocate(int fd, int mode, off_t offset, off_t len)
@@ -1811,16 +1823,27 @@ static int handle_aiocb_discard(void *opaque)
}
} while (errno == EINTR);
- ret = -errno;
+ ret = translate_err(-errno);
#endif
} else {
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes);
+ ret = translate_err(-errno);
+#elif defined(__APPLE__) && (__MACH__)
+ fpunchhole_t fpunchhole;
+ fpunchhole.fp_flags = 0;
+ fpunchhole.reserved = 0;
+ fpunchhole.fp_offset = aiocb->aio_offset;
+ fpunchhole.fp_length = aiocb->aio_nbytes;
+ if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
+ ret = errno == ENODEV ? -ENOTSUP : -errno;
+ } else {
+ ret = 0;
+ }
#endif
}
- ret = translate_err(ret);
if (ret == -ENOTSUP) {
s->has_discard = false;
}
diff --git a/block/io.c b/block/io.c
index cf177a9..e0a689c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -125,6 +125,8 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
{
+ dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
+ src->pdiscard_alignment);
dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
diff --git a/default-configs/devices/ppc-softmmu.mak b/default-configs/devices/ppc-softmmu.mak
index c2d4119..4535993 100644
--- a/default-configs/devices/ppc-softmmu.mak
+++ b/default-configs/devices/ppc-softmmu.mak
@@ -14,7 +14,7 @@ CONFIG_SAM460EX=y
CONFIG_MAC_OLDWORLD=y
CONFIG_MAC_NEWWORLD=y
-CONFIG_PEGASOS2=n
+CONFIG_PEGASOS2=y
# For PReP
CONFIG_PREP=y
diff --git a/docs/pcie_pci_bridge.txt b/docs/pcie_pci_bridge.txt
index ab35ebf..1aa08fc 100644
--- a/docs/pcie_pci_bridge.txt
+++ b/docs/pcie_pci_bridge.txt
@@ -70,9 +70,9 @@ A detailed command line would be:
[qemu-bin + storage options] \
-m 2G \
--device pcie-root-port,bus=pcie.0,id=rp1 \
--device pcie-root-port,bus=pcie.0,id=rp2 \
--device pcie-root-port,bus=pcie.0,id=rp3,bus-reserve=1 \
+-device pcie-root-port,bus=pcie.0,id=rp1,slot=1 \
+-device pcie-root-port,bus=pcie.0,id=rp2,slot=2 \
+-device pcie-root-port,bus=pcie.0,id=rp3,slot=3,bus-reserve=1 \
-device pcie-pci-bridge,id=br1,bus=rp1 \
-device pcie-pci-bridge,id=br2,bus=rp2 \
-device e1000,bus=br1,addr=8
diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 25d6c4c..6d438f1 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -221,6 +221,24 @@ This machine is deprecated because we have enough AST2500 based OpenPOWER
machines. It can be easily replaced by the ``witherspoon-bmc`` or the
``romulus-bmc`` machines.
+Backend options
+---------------
+
+Using non-persistent backing file with pmem=on (since 6.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+This option is used when ``memory-backend-file`` is consumed by emulated NVDIMM
+device. However enabling ``memory-backend-file.pmem`` option, when backing file
+is (a) not DAX capable or (b) not on a filesystem that support direct mapping
+of persistent memory, is not safe and may lead to data loss or corruption in case
+of host crash.
+Options are:
+
+ - modify VM configuration to set ``pmem=off`` to continue using fake NVDIMM
+ (without persistence guaranties) with backing file on non DAX storage
+ - move backing file to NVDIMM storage and keep ``pmem=on``
+ (to have NVDIMM with persistence guaranties).
+
Device options
--------------
diff --git a/docs/system/ppc/ppce500.rst b/docs/system/ppc/ppce500.rst
index 7a815c1..afc58f6 100644
--- a/docs/system/ppc/ppce500.rst
+++ b/docs/system/ppc/ppce500.rst
@@ -19,6 +19,7 @@ The ``ppce500`` machine supports the following devices:
* Power-off functionality via one GPIO pin
* 1 Freescale MPC8xxx PCI host controller
* VirtIO devices via PCI bus
+* 1 Freescale Enhanced Triple Speed Ethernet controller (eTSEC)
Hardware configuration information
----------------------------------
@@ -121,7 +122,7 @@ To boot the 32-bit Linux kernel:
Running U-Boot
--------------
-U-Boot mainline v2021.04 release is tested at the time of writing. To build a
+U-Boot mainline v2021.07 release is tested at the time of writing. To build a
U-Boot mainline bootloader that can be booted by the ``ppce500`` machine, use
the qemu-ppce500_defconfig with similar commands as described above for Linux:
@@ -154,3 +155,10 @@ interface at PCI address 0.1.0, but we can switch that to an e1000 NIC by:
-display none -serial stdio \
-bios u-boot \
-nic tap,ifname=tap0,script=no,downscript=no,model=e1000
+
+The QEMU ``ppce500`` machine can also dynamically instantiate an eTSEC device
+if “-device eTSEC” is given to QEMU:
+
+.. code-block:: bash
+
+ -netdev tap,ifname=tap0,script=no,downscript=no,id=net0 -device eTSEC,netdev=net0
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 39c8257..e28457a 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -207,7 +207,7 @@ static void ged_regs_write(void *opaque, hwaddr addr, uint64_t data,
return;
case ACPI_GED_REG_RESET:
if (data == ACPI_GED_RESET_VALUE) {
- qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
}
return;
}
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573..d47ebf0 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -65,24 +65,58 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
{
BlockBackend *blk = conf->blk;
BlockSizes blocksizes;
- int backend_ret;
+ BlockDriverState *bs;
+ bool use_blocksizes;
+ bool use_bs;
+
+ switch (conf->backend_defaults) {
+ case ON_OFF_AUTO_AUTO:
+ use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
+ use_bs = false;
+ break;
+
+ case ON_OFF_AUTO_ON:
+ use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
+ bs = blk_bs(blk);
+ use_bs = bs;
+ break;
+
+ case ON_OFF_AUTO_OFF:
+ use_blocksizes = false;
+ use_bs = false;
+ break;
+
+ default:
+ abort();
+ }
- backend_ret = blk_probe_blocksizes(blk, &blocksizes);
/* fill in detected values if they are not defined via qemu command line */
if (!conf->physical_block_size) {
- if (!backend_ret) {
+ if (use_blocksizes) {
conf->physical_block_size = blocksizes.phys;
} else {
conf->physical_block_size = BDRV_SECTOR_SIZE;
}
}
if (!conf->logical_block_size) {
- if (!backend_ret) {
+ if (use_blocksizes) {
conf->logical_block_size = blocksizes.log;
} else {
conf->logical_block_size = BDRV_SECTOR_SIZE;
}
}
+ if (use_bs) {
+ if (!conf->opt_io_size) {
+ conf->opt_io_size = bs->bl.opt_transfer;
+ }
+ if (conf->discard_granularity == -1) {
+ if (bs->bl.pdiscard_alignment) {
+ conf->discard_granularity = bs->bl.pdiscard_alignment;
+ } else if (bs->bl.request_alignment != 1) {
+ conf->discard_granularity = bs->bl.request_alignment;
+ }
+ }
+ }
if (conf->logical_block_size > conf->physical_block_size) {
error_setg(errp,
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd81893..252c3a7 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -198,6 +198,10 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
goto fail_guest_notifiers;
}
+ /*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
memory_region_transaction_begin();
/* Set up virtqueue notify */
@@ -211,6 +215,10 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
}
+ /*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
memory_region_transaction_commit();
while (j--) {
@@ -330,12 +338,20 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
aio_context_release(s->ctx);
+ /*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
memory_region_transaction_begin();
for (i = 0; i < nvqs; i++) {
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
}
+ /*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
memory_region_transaction_commit();
for (i = 0; i < nvqs; i++) {
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 984caf8..c9887d5 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2372,6 +2372,14 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp)
assert(!qemu_uuid_is_null(&vdev->instanceid));
+ if (!qemu_uuid_is_null(&vdc->instanceid)) {
+ /* Class wants to only have a single instance with a fixed UUID */
+ if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
+ error_setg(&err, "instance id can't be changed");
+ goto error_out;
+ }
+ }
+
/* Check for instance id collision for this class id */
QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
@@ -2438,18 +2446,22 @@ static void vmbus_dev_unrealize(DeviceState *dev)
free_channels(vdev);
}
+static Property vmbus_dev_props[] = {
+ DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+
static void vmbus_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *kdev = DEVICE_CLASS(klass);
+ device_class_set_props(kdev, vmbus_dev_props);
kdev->bus_type = TYPE_VMBUS;
kdev->realize = vmbus_dev_realize;
kdev->unrealize = vmbus_dev_unrealize;
kdev->reset = vmbus_dev_reset;
}
-static Property vmbus_dev_instanceid =
- DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
-
static void vmbus_dev_instance_init(Object *obj)
{
VMBusDevice *vdev = VMBUS_DEVICE(obj);
@@ -2458,8 +2470,6 @@ static void vmbus_dev_instance_init(Object *obj)
if (!qemu_uuid_is_null(&vdc->instanceid)) {
/* Class wants to only have a single instance with a fixed UUID */
vdev->instanceid = vdc->instanceid;
- } else {
- qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
}
}
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 796ffc6..357437f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -435,11 +435,15 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16)));
if (bsel) {
- aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
+ /*
+ * Can't declare _SUN here for every device as it changes 'slot'
+ * enumeration order in linux kernel, so use another variable for it
+ */
+ aml_append(dev, aml_name_decl("ASUN", aml_int(slot)));
method = aml_method("_DSM", 4, AML_SERIALIZED);
aml_append(method, aml_return(
aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2),
- aml_arg(3), aml_name("BSEL"), aml_name("_SUN"))
+ aml_arg(3), aml_name("BSEL"), aml_name("ASUN"))
));
aml_append(dev, method);
}
@@ -466,6 +470,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
aml_append(method, aml_return(aml_int(s3d)));
aml_append(dev, method);
} else if (hotplug_enabled_dev) {
+ aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
/* add _EJ0 to make slot hotpluggable */
method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
aml_append(method,
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index bd7958b..16d20cd 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3234,6 +3234,7 @@ static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
}
hotplug_handler_plug(hotplug_ctrl, dev, &err);
}
+ pdev->partially_hotplugged = false;
out:
error_propagate(errp, err);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 2eb729d..0f37cf0 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -29,6 +29,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/log.h"
#include "hw/i386/pc.h"
#include "hw/pci-host/q35.h"
#include "hw/qdev-properties.h"
@@ -318,6 +319,8 @@ static void mch_update_pciexbar(MCHPCIState *mch)
addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
break;
case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_RVD:
+ qemu_log_mask(LOG_GUEST_ERROR, "Q35: Reserved PCIEXBAR LENGTH\n");
+ return;
default:
abort();
}
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index 66e0b15..7fcafec 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -13,6 +13,7 @@ config PSERIES
select MSI_NONBROKEN
select FDT_PPC
select CHRP_NVRAM
+ select VOF
config SPAPR_RNG
bool
@@ -75,6 +76,7 @@ config PEGASOS2
select VT82C686
select IDE_VIA
select SMBUS_EEPROM
+ select VOF
# This should come with VT82C686
select ACPI_X86
@@ -144,3 +146,6 @@ config FW_CFG_PPC
config FDT_PPC
bool
+
+config VOF
+ bool
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 597d974..aa4c8e6 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -84,4 +84,7 @@ ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c'))
# Pegasos2
ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c'))
+ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c'))
+ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c'))
+
hw_arch += {'ppc': ppc_ss}
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 0bfd092..9a6ae86 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -1,7 +1,7 @@
/*
* QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
*
- * Copyright (c) 2018-2020 BALATON Zoltan
+ * Copyright (c) 2018-2021 BALATON Zoltan
*
* This work is licensed under the GNU GPL license version 2 or later.
*
@@ -34,26 +34,68 @@
#include "trace.h"
#include "qemu/datadir.h"
#include "sysemu/device_tree.h"
+#include "hw/ppc/vof.h"
-#define PROM_FILENAME "pegasos2.rom"
+#include <libfdt.h>
+
+#define PROM_FILENAME "vof.bin"
#define PROM_ADDR 0xfff00000
#define PROM_SIZE 0x80000
+#define KVMPPC_HCALL_BASE 0xf000
+#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5)
+
+#define H_SUCCESS 0
+#define H_PRIVILEGE -3 /* Caller not privileged */
+#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */
+
#define BUS_FREQ_HZ 133333333
+#define PCI0_MEM_BASE 0xc0000000
+#define PCI0_MEM_SIZE 0x20000000
+#define PCI0_IO_BASE 0xf8000000
+#define PCI0_IO_SIZE 0x10000
+
+#define PCI1_MEM_BASE 0x80000000
+#define PCI1_MEM_SIZE 0x40000000
+#define PCI1_IO_BASE 0xfe000000
+#define PCI1_IO_SIZE 0x10000
+
+#define TYPE_PEGASOS2_MACHINE MACHINE_TYPE_NAME("pegasos2")
+OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE)
+
+struct Pegasos2MachineState {
+ MachineState parent_obj;
+ PowerPCCPU *cpu;
+ DeviceState *mv;
+ Vof *vof;
+ void *fdt_blob;
+ uint64_t kernel_addr;
+ uint64_t kernel_entry;
+ uint64_t kernel_size;
+};
+
+static void *build_fdt(MachineState *machine, int *fdt_size);
+
static void pegasos2_cpu_reset(void *opaque)
{
PowerPCCPU *cpu = opaque;
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine);
cpu_reset(CPU(cpu));
cpu->env.spr[SPR_HID1] = 7ULL << 28;
+ if (pm->vof) {
+ cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
+ cpu->env.nip = 0x100;
+ }
}
static void pegasos2_init(MachineState *machine)
{
- PowerPCCPU *cpu = NULL;
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ CPUPPCState *env;
MemoryRegion *rom = g_new(MemoryRegion, 1);
- DeviceState *mv;
PCIBus *pci_bus;
PCIDevice *dev;
I2CBus *i2c_bus;
@@ -63,15 +105,16 @@ static void pegasos2_init(MachineState *machine)
uint8_t *spd_data;
/* init CPU */
- cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
- if (PPC_INPUT(&cpu->env) != PPC_FLAGS_INPUT_6xx) {
+ pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &pm->cpu->env;
+ if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
error_report("Incompatible CPU, only 6xx bus supported");
exit(1);
}
/* Set time-base frequency */
- cpu_ppc_tb_init(&cpu->env, BUS_FREQ_HZ / 4);
- qemu_register_reset(pegasos2_cpu_reset, cpu);
+ cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4);
+ qemu_register_reset(pegasos2_cpu_reset, pm->cpu);
/* RAM */
memory_region_add_subregion(get_system_memory(), 0, machine->ram);
@@ -82,30 +125,36 @@ static void pegasos2_init(MachineState *machine)
error_report("Could not find firmware '%s'", fwname);
exit(1);
}
+ if (!machine->firmware && !pm->vof) {
+ pm->vof = g_malloc0(sizeof(*pm->vof));
+ }
memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal);
memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom);
sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1,
PPC_ELF_MACHINE, 0, 0);
if (sz <= 0) {
- sz = load_image_targphys(filename, PROM_ADDR, PROM_SIZE);
+ sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE);
}
if (sz <= 0 || sz > PROM_SIZE) {
error_report("Could not load firmware '%s'", filename);
exit(1);
}
g_free(filename);
+ if (pm->vof) {
+ pm->vof->fw_size = sz;
+ }
/* Marvell Discovery II system controller */
- mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
- ((qemu_irq *)cpu->env.irq_inputs)[PPC6xx_INPUT_INT]));
- pci_bus = mv64361_get_pci_bus(mv, 1);
+ pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]));
+ pci_bus = mv64361_get_pci_bus(pm->mv, 1);
/* VIA VT8231 South Bridge (multifunction PCI device) */
/* VT8231 function 0: PCI-to-ISA Bridge */
dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true,
TYPE_VT8231_ISA);
qdev_connect_gpio_out(DEVICE(dev), 0,
- qdev_get_gpio_in_named(mv, "gpp", 31));
+ qdev_get_gpio_in_named(pm->mv, "gpp", 31));
/* VT8231 function 1: IDE Controller */
dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide");
@@ -127,18 +176,728 @@ static void pegasos2_init(MachineState *machine)
/* other PC hardware */
pci_vga_init(pci_bus);
+
+ if (machine->kernel_filename) {
+ sz = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+ &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1,
+ PPC_ELF_MACHINE, 0, 0);
+ if (sz <= 0) {
+ error_report("Could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ pm->kernel_size = sz;
+ if (!pm->vof) {
+ warn_report("Option -kernel may be ineffective with -bios.");
+ }
+ }
+ if (machine->kernel_cmdline && !pm->vof) {
+ warn_report("Option -append may be ineffective with -bios.");
+ }
+}
+
+static uint32_t pegasos2_pci_config_read(AddressSpace *as, int bus,
+ uint32_t addr, uint32_t len)
+{
+ hwaddr pcicfg = (bus ? 0xf1000c78 : 0xf1000cf8);
+ uint32_t val = 0xffffffff;
+
+ stl_le_phys(as, pcicfg, addr | BIT(31));
+ switch (len) {
+ case 4:
+ val = ldl_le_phys(as, pcicfg + 4);
+ break;
+ case 2:
+ val = lduw_le_phys(as, pcicfg + 4);
+ break;
+ case 1:
+ val = ldub_phys(as, pcicfg + 4);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid length\n", __func__);
+ break;
+ }
+ return val;
+}
+
+static void pegasos2_pci_config_write(AddressSpace *as, int bus, uint32_t addr,
+ uint32_t len, uint32_t val)
+{
+ hwaddr pcicfg = (bus ? 0xf1000c78 : 0xf1000cf8);
+
+ stl_le_phys(as, pcicfg, addr | BIT(31));
+ switch (len) {
+ case 4:
+ stl_le_phys(as, pcicfg + 4, val);
+ break;
+ case 2:
+ stw_le_phys(as, pcicfg + 4, val);
+ break;
+ case 1:
+ stb_phys(as, pcicfg + 4, val);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid length\n", __func__);
+ break;
+ }
+}
+
+static void pegasos2_machine_reset(MachineState *machine)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ AddressSpace *as = CPU(pm->cpu)->as;
+ void *fdt;
+ uint64_t d[2];
+ int sz;
+
+ qemu_devices_reset();
+ if (!pm->vof) {
+ return; /* Firmware should set up machine so nothing to do */
+ }
+
+ /* Otherwise, set up devices that board firmware would normally do */
+ stl_le_phys(as, 0xf1000000, 0x28020ff);
+ stl_le_phys(as, 0xf1000278, 0xa31fc);
+ stl_le_phys(as, 0xf100f300, 0x11ff0400);
+ stl_le_phys(as, 0xf100f10c, 0x80000000);
+ stl_le_phys(as, 0xf100001c, 0x8000000);
+ pegasos2_pci_config_write(as, 0, PCI_COMMAND, 2, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+ pegasos2_pci_config_write(as, 1, PCI_COMMAND, 2, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 0) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x9);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 0) << 8) |
+ 0x50, 1, 0x2);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x109);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_CLASS_PROG, 1, 0xf);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ 0x40, 1, 0xb);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ 0x50, 4, 0x17171717);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_COMMAND, 2, 0x87);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 2) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x409);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 3) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x409);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x9);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x48, 4, 0xf00);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x40, 4, 0x558020);
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x90, 4, 0xd00);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 5) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x309);
+
+ pegasos2_pci_config_write(as, 1, (PCI_DEVFN(12, 6) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x309);
+
+ /* Device tree and VOF set up */
+ vof_init(pm->vof, machine->ram_size, &error_fatal);
+ if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) {
+ error_report("Memory allocation for stack failed");
+ exit(1);
+ }
+ if (pm->kernel_size &&
+ vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) {
+ error_report("Memory for kernel is in use");
+ exit(1);
+ }
+ fdt = build_fdt(machine, &sz);
+ /* FIXME: VOF assumes entry is same as load address */
+ d[0] = cpu_to_be64(pm->kernel_entry);
+ d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
+ qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
+
+ qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+ g_free(pm->fdt_blob);
+ pm->fdt_blob = fdt;
+
+ vof_build_dt(fdt, pm->vof);
+ vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe");
+ pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
}
-static void pegasos2_machine(MachineClass *mc)
+enum pegasos2_rtas_tokens {
+ RTAS_RESTART_RTAS = 0,
+ RTAS_NVRAM_FETCH = 1,
+ RTAS_NVRAM_STORE = 2,
+ RTAS_GET_TIME_OF_DAY = 3,
+ RTAS_SET_TIME_OF_DAY = 4,
+ RTAS_EVENT_SCAN = 6,
+ RTAS_CHECK_EXCEPTION = 7,
+ RTAS_READ_PCI_CONFIG = 8,
+ RTAS_WRITE_PCI_CONFIG = 9,
+ RTAS_DISPLAY_CHARACTER = 10,
+ RTAS_SET_INDICATOR = 11,
+ RTAS_POWER_OFF = 17,
+ RTAS_SUSPEND = 18,
+ RTAS_HIBERNATE = 19,
+ RTAS_SYSTEM_REBOOT = 20,
+};
+
+static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm,
+ target_ulong args_real)
{
+ AddressSpace *as = CPU(cpu)->as;
+ uint32_t token = ldl_be_phys(as, args_real);
+ uint32_t nargs = ldl_be_phys(as, args_real + 4);
+ uint32_t nrets = ldl_be_phys(as, args_real + 8);
+ uint32_t args = args_real + 12;
+ uint32_t rets = args_real + 12 + nargs * 4;
+
+ if (nrets < 1) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n");
+ return H_PARAMETER;
+ }
+ switch (token) {
+ case RTAS_READ_PCI_CONFIG:
+ {
+ uint32_t addr, len, val;
+
+ if (nargs != 2 || nrets != 2) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ addr = ldl_be_phys(as, args);
+ len = ldl_be_phys(as, args + 4);
+ val = pegasos2_pci_config_read(as, !(addr >> 24),
+ addr & 0x0fffffff, len);
+ stl_be_phys(as, rets, 0);
+ stl_be_phys(as, rets + 4, val);
+ return H_SUCCESS;
+ }
+ case RTAS_WRITE_PCI_CONFIG:
+ {
+ uint32_t addr, len, val;
+
+ if (nargs != 3 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ addr = ldl_be_phys(as, args);
+ len = ldl_be_phys(as, args + 4);
+ val = ldl_be_phys(as, args + 8);
+ pegasos2_pci_config_write(as, !(addr >> 24),
+ addr & 0x0fffffff, len, val);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+ case RTAS_DISPLAY_CHARACTER:
+ if (nargs != 1 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args));
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ default:
+ qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n",
+ token, nargs, nrets);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+}
+
+static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp);
+ CPUPPCState *env = &cpu->env;
+
+ /* The TCG path should also be holding the BQL at this point */
+ g_assert(qemu_mutex_iothread_locked());
+
+ if (msr_pr) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n");
+ env->gpr[3] = H_PRIVILEGE;
+ } else if (env->gpr[3] == KVMPPC_H_RTAS) {
+ env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]);
+ } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) {
+ int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob,
+ env->gpr[4]);
+ env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS);
+ } else {
+ qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx
+ "\n", env->gpr[3]);
+ env->gpr[3] = -1;
+ }
+}
+
+static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+}
+
+static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+ return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1];
+}
+
+static void pegasos2_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+
mc->desc = "Genesi/bPlan Pegasos II";
mc->init = pegasos2_init;
+ mc->reset = pegasos2_machine_reset;
mc->block_default_type = IF_IDE;
mc->default_boot_order = "cd";
mc->default_display = "std";
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
mc->default_ram_id = "pegasos2.ram";
mc->default_ram_size = 512 * MiB;
+
+ vhc->hypercall = pegasos2_hypercall;
+ vhc->cpu_exec_enter = vhyp_nop;
+ vhc->cpu_exec_exit = vhyp_nop;
+ vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr;
+}
+
+static const TypeInfo pegasos2_machine_info = {
+ .name = TYPE_PEGASOS2_MACHINE,
+ .parent = TYPE_MACHINE,
+ .class_init = pegasos2_machine_class_init,
+ .instance_size = sizeof(Pegasos2MachineState),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PPC_VIRTUAL_HYPERVISOR },
+ { }
+ },
+};
+
+static void pegasos2_machine_register_types(void)
+{
+ type_register_static(&pegasos2_machine_info);
+}
+
+type_init(pegasos2_machine_register_types)
+
+/* FDT creation for passing to firmware */
+
+typedef struct {
+ void *fdt;
+ const char *path;
+} FDTInfo;
+
+/* We do everything in reverse order so it comes out right in the tree */
+
+static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi");
}
-DEFINE_MACHINE("pegasos2", pegasos2_machine)
+static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0);
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1);
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb");
+}
+
+static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ GString *name = g_string_sized_new(64);
+ uint32_t cells[3];
+
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1);
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2);
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa");
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa");
+
+ /* addional devices */
+ g_string_printf(name, "%s/lpt@i3bc", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(7);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x3bc);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt");
+
+ g_string_printf(name, "%s/fdc@i3f0", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(6);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x3f0);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc");
+
+ g_string_printf(name, "%s/timer@i40", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x40);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer");
+
+ g_string_printf(name, "%s/rtc@i70", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc");
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(8);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x70);
+ cells[2] = cpu_to_be32(2);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc");
+
+ g_string_printf(name, "%s/keyboard@i60", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x60);
+ cells[2] = cpu_to_be32(5);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard");
+
+ g_string_printf(name, "%s/8042@i60", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1);
+ qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", "");
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x60);
+ cells[2] = cpu_to_be32(5);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042");
+
+ g_string_printf(name, "%s/serial@i2f8", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(3);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x2f8);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial");
+
+ g_string_free(name, TRUE);
+}
+
+static struct {
+ const char *id;
+ const char *name;
+ void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi);
+} device_map[] = {
+ { "pci11ab,6460", "host", NULL },
+ { "pci1106,8231", "isa", dt_isa },
+ { "pci1106,571", "ide", dt_ide },
+ { "pci1106,3044", "firewire", NULL },
+ { "pci1106,3038", "usb", dt_usb },
+ { "pci1106,8235", "other", NULL },
+ { "pci1106,3058", "sound", NULL },
+ { NULL, NULL }
+};
+
+static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+ FDTInfo *fi = opaque;
+ GString *node = g_string_new(NULL);
+ uint32_t cells[(PCI_NUM_REGIONS + 1) * 5];
+ int i, j;
+ const char *name = NULL;
+ g_autofree const gchar *pn = g_strdup_printf("pci%x,%x",
+ pci_get_word(&d->config[PCI_VENDOR_ID]),
+ pci_get_word(&d->config[PCI_DEVICE_ID]));
+
+ for (i = 0; device_map[i].id; i++) {
+ if (!strcmp(pn, device_map[i].id)) {
+ name = device_map[i].name;
+ break;
+ }
+ }
+ g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn),
+ PCI_SLOT(d->devfn));
+ if (PCI_FUNC(d->devfn)) {
+ g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn));
+ }
+
+ qemu_fdt_add_subnode(fi->fdt, node->str);
+ if (device_map[i].dtf) {
+ FDTInfo cfi = { fi->fdt, node->str };
+ device_map[i].dtf(bus, d, &cfi);
+ }
+ cells[0] = cpu_to_be32(d->devfn << 8);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = 0;
+ cells[4] = 0;
+ j = 5;
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ if (!d->io_regions[i].size) {
+ continue;
+ }
+ cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4));
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+ cells[j] |= cpu_to_be32(1 << 24);
+ } else {
+ cells[j] |= cpu_to_be32(2 << 24);
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+ cells[j] |= cpu_to_be32(4 << 28);
+ }
+ }
+ cells[j + 1] = 0;
+ cells[j + 2] = 0;
+ cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32);
+ cells[j + 4] = cpu_to_be32(d->io_regions[i].size);
+ j += 5;
+ }
+ qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn);
+ if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) {
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts",
+ pci_get_byte(&d->config[PCI_INTERRUPT_PIN]));
+ }
+ /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id",
+ pci_get_word(&d->config[PCI_SUBSYSTEM_ID]));
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id",
+ pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID]));
+ cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id",
+ pci_get_word(&d->config[PCI_DEVICE_ID]));
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id",
+ pci_get_word(&d->config[PCI_VENDOR_ID]));
+
+ g_string_free(node, TRUE);
+}
+
+static void *build_fdt(MachineState *machine, int *fdt_size)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ PowerPCCPU *cpu = pm->cpu;
+ PCIBus *pci_bus;
+ FDTInfo fi;
+ uint32_t cells[16];
+ void *fdt = create_device_tree(fdt_size);
+
+ fi.fdt = fdt;
+
+ /* root node */
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description",
+ "Pegasos CHRP PowerPC System");
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2");
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH");
+ qemu_fdt_setprop_string(fdt, "/", "revision", "2B");
+ qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2");
+ qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp");
+ qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1);
+ qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2");
+
+ /* pci@c0000000 */
+ qemu_fdt_add_subnode(fdt, "/pci@c0000000");
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range",
+ cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1);
+ cells[0] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[1] = cpu_to_be32(PCI0_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(0x01000000);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = cpu_to_be32(PCI0_IO_BASE);
+ cells[4] = 0;
+ cells[5] = cpu_to_be32(PCI0_IO_SIZE);
+ cells[6] = cpu_to_be32(0x02000000);
+ cells[7] = 0;
+ cells[8] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[9] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[10] = 0;
+ cells[11] = cpu_to_be32(PCI0_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges",
+ cells, 12 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci");
+ qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci");
+
+ fi.path = "/pci@c0000000";
+ pci_bus = mv64361_get_pci_bus(pm->mv, 0);
+ pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+ /* pci@80000000 */
+ qemu_fdt_add_subnode(fdt, "/pci@80000000");
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range",
+ cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0);
+ cells[0] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[1] = cpu_to_be32(PCI1_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge",
+ 0xf1000cb4);
+ cells[0] = cpu_to_be32(0x01000000);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = cpu_to_be32(PCI1_IO_BASE);
+ cells[4] = 0;
+ cells[5] = cpu_to_be32(PCI1_IO_SIZE);
+ cells[6] = cpu_to_be32(0x02000000);
+ cells[7] = 0;
+ cells[8] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[9] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[10] = 0;
+ cells[11] = cpu_to_be32(PCI1_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@80000000", "ranges",
+ cells, 12 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci");
+ qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci");
+
+ fi.path = "/pci@80000000";
+ pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+ pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+ qemu_fdt_add_subnode(fdt, "/failsafe");
+ qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial");
+ qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe");
+
+ qemu_fdt_add_subnode(fdt, "/rtas");
+ qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "display-character",
+ RTAS_DISPLAY_CHARACTER);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config",
+ RTAS_WRITE_PCI_CONFIG);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config",
+ RTAS_READ_PCI_CONFIG);
+ /* Pegasos2 firmware misspells check-exception and guests use that */
+ qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption",
+ RTAS_CHECK_EXCEPTION);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day",
+ RTAS_SET_TIME_OF_DAY);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day",
+ RTAS_GET_TIME_OF_DAY);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1);
+
+ /* cpus */
+ qemu_fdt_add_subnode(fdt, "/cpus");
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+ qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus");
+
+ /* FIXME Get CPU name from CPU object */
+ const char *cp = "/cpus/PowerPC,G4";
+ qemu_fdt_add_subnode(fdt, cp);
+ qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size",
+ cpu->env.dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size",
+ cpu->env.dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size",
+ cpu->env.icache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size",
+ cpu->env.icache_line_size);
+ if (cpu->env.id_tlbs) {
+ qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way);
+ qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way);
+ qemu_fdt_setprop_string(fdt, cp, "tlb-split", "");
+ }
+ qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb);
+ qemu_fdt_setprop_string(fdt, cp, "state", "running");
+ if (cpu->env.insns_flags & PPC_ALTIVEC) {
+ qemu_fdt_setprop_string(fdt, cp, "altivec", "");
+ qemu_fdt_setprop_string(fdt, cp, "data-streams", "");
+ }
+ /*
+ * FIXME What flags do data-streams, external-control and
+ * performance-monitor depend on?
+ */
+ qemu_fdt_setprop_string(fdt, cp, "external-control", "");
+ if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) {
+ qemu_fdt_setprop_string(fdt, cp, "general-purpose", "");
+ }
+ qemu_fdt_setprop_string(fdt, cp, "performance-monitor", "");
+ if (cpu->env.insns_flags & PPC_FLOAT_FRES) {
+ qemu_fdt_setprop_string(fdt, cp, "graphics", "");
+ }
+ qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4);
+ qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency",
+ cpu->env.tb_env->tb_freq);
+ qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ);
+ qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5);
+ qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]);
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu");
+ qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1);
+
+ /* memory */
+ qemu_fdt_add_subnode(fdt, "/memory@0");
+ cells[0] = 0;
+ cells[1] = cpu_to_be32(machine->ram_size);
+ qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory");
+ qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory");
+
+ qemu_fdt_add_subnode(fdt, "/chosen");
+ qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+ machine->kernel_cmdline ?: "");
+ qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen");
+
+ qemu_fdt_add_subnode(fdt, "/openprom");
+ qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1");
+
+ return fdt;
+}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4dd90b7..a007be4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -101,6 +101,7 @@
#define FDT_MAX_ADDR 0x80000000 /* FDT must stay below that */
#define FW_MAX_SIZE 0x400000
#define FW_FILE_NAME "slof.bin"
+#define FW_FILE_NAME_VOF "vof.bin"
#define FW_OVERHEAD 0x2800000
#define KERNEL_LOAD_ADDR FW_MAX_SIZE
@@ -880,6 +881,10 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
add_str(hypertas, "hcall-copy");
add_str(hypertas, "hcall-debug");
add_str(hypertas, "hcall-vphn");
+ if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) {
+ add_str(hypertas, "hcall-rpt-invalidate");
+ }
+
add_str(qemu_hypertas, "hcall-memop1");
if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
@@ -919,9 +924,13 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
*
* The extra 8 bytes is required because Linux's FWNMI error log check
* is off-by-one.
+ *
+ * RTAS_MIN_SIZE is required for the RTAS blob itself.
*/
- _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
- ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
+ _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE +
+ RTAS_ERROR_LOG_MAX +
+ ms->smp.max_cpus * sizeof(uint64_t) * 2 +
+ sizeof(uint64_t)));
_FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
RTAS_ERROR_LOG_MAX));
_FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
@@ -1639,22 +1648,29 @@ static void spapr_machine_reset(MachineState *machine)
fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE;
fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE);
+ if (spapr->vof) {
+ spapr_vof_reset(spapr, fdt, &error_fatal);
+ /*
+ * Do not pack the FDT as the client may change properties.
+ * VOF client does not expect the FDT so we do not load it to the VM.
+ */
+ } else {
+ rc = fdt_pack(fdt);
+ /* Should only fail if we've built a corrupted tree */
+ assert(rc == 0);
- rc = fdt_pack(fdt);
-
- /* Should only fail if we've built a corrupted tree */
- assert(rc == 0);
-
- /* Load the fdt */
+ spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+ 0, fdt_addr, 0);
+ cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+ }
qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
- cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+
g_free(spapr->fdt_blob);
spapr->fdt_size = fdt_totalsize(fdt);
spapr->fdt_initial_size = spapr->fdt_size;
spapr->fdt_blob = fdt;
/* Set up the entry state */
- spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, 0, fdt_addr, 0);
first_ppc_cpu->env.gpr[5] = 0;
spapr->fwnmi_system_reset_addr = -1;
@@ -2018,6 +2034,7 @@ static const VMStateDescription vmstate_spapr = {
&vmstate_spapr_cap_ccf_assist,
&vmstate_spapr_cap_fwnmi,
&vmstate_spapr_fwnmi,
+ &vmstate_spapr_cap_rpt_invalidate,
NULL
}
};
@@ -2657,7 +2674,8 @@ static void spapr_machine_init(MachineState *machine)
SpaprMachineState *spapr = SPAPR_MACHINE(machine);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
MachineClass *mc = MACHINE_GET_CLASS(machine);
- const char *bios_name = machine->firmware ?: FW_FILE_NAME;
+ const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME;
+ const char *bios_name = machine->firmware ?: bios_default;
const char *kernel_filename = machine->kernel_filename;
const char *initrd_filename = machine->initrd_filename;
PCIHostState *phb;
@@ -3014,6 +3032,10 @@ static void spapr_machine_init(MachineState *machine)
}
qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
+ if (spapr->vof) {
+ spapr->vof->fw_size = fw_size; /* for claim() on itself */
+ spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
+ }
}
#define DEFAULT_KVM_TYPE "auto"
@@ -3204,6 +3226,28 @@ static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
}
}
+static bool spapr_get_vof(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return spapr->vof != NULL;
+}
+
+static void spapr_set_vof(Object *obj, bool value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (spapr->vof) {
+ vof_cleanup(spapr->vof);
+ g_free(spapr->vof);
+ spapr->vof = NULL;
+ }
+ if (!value) {
+ return;
+ }
+ spapr->vof = g_malloc0(sizeof(*spapr->vof));
+}
+
static char *spapr_get_ic_mode(Object *obj, Error **errp)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3329,6 +3373,11 @@ static void spapr_instance_init(Object *obj)
stringify(KERNEL_LOAD_ADDR)
" for -kernel is the default");
spapr->kernel_addr = KERNEL_LOAD_ADDR;
+
+ object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof);
+ object_property_set_description(obj, "x-vof",
+ "Enable Virtual Open Firmware (experimental)");
+
/* The machine class defines the default interrupt controller mode */
spapr->irq = smc->irq;
object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
@@ -4492,6 +4541,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+ VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
mc->desc = "pSeries Logical Partition (PAPR compliant)";
mc->ignore_boot_device_suffixes = true;
@@ -4573,6 +4623,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF;
spapr_caps_add_properties(smc);
smc->irq = &spapr_irq_dual;
smc->dr_phb_enabled = true;
@@ -4580,6 +4631,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->smp_threads_vsmt = true;
smc->nr_xirqs = SPAPR_NR_XIRQS;
xfc->match_nvt = spapr_match_nvt;
+ vmc->client_architecture_support = spapr_vof_client_architecture_support;
+ vmc->quiesce = spapr_vof_quiesce;
+ vmc->setprop = spapr_vof_setprop;
}
static const TypeInfo spapr_machine_info = {
@@ -4599,6 +4653,7 @@ static const TypeInfo spapr_machine_info = {
{ TYPE_XICS_FABRIC },
{ TYPE_INTERRUPT_STATS_PROVIDER },
{ TYPE_XIVE_FABRIC },
+ { TYPE_VOF_MACHINE_IF },
{ }
},
};
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index d0c419b..ed7c077 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -582,6 +582,37 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
}
}
+static void cap_rpt_invalidate_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+
+ if (!val) {
+ /* capability disabled by default */
+ return;
+ }
+
+ if (tcg_enabled()) {
+ error_setg(errp, "No H_RPT_INVALIDATE support in TCG");
+ error_append_hint(errp,
+ "Try appending -machine cap-rpt-invalidate=off\n");
+ } else if (kvm_enabled()) {
+ if (!kvmppc_has_cap_mmu_radix()) {
+ error_setg(errp, "H_RPT_INVALIDATE only supported on Radix");
+ return;
+ }
+
+ if (!kvmppc_has_cap_rpt_invalidate()) {
+ error_setg(errp,
+ "KVM implementation does not support H_RPT_INVALIDATE");
+ error_append_hint(errp,
+ "Try appending -machine cap-rpt-invalidate=off\n");
+ } else {
+ kvmppc_enable_h_rpt_invalidate();
+ }
+ }
+}
+
SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
[SPAPR_CAP_HTM] = {
.name = "htm",
@@ -690,6 +721,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
.type = "bool",
.apply = cap_fwnmi_apply,
},
+ [SPAPR_CAP_RPT_INVALIDATE] = {
+ .name = "rpt-invalidate",
+ .description = "Allow H_RPT_INVALIDATE",
+ .index = SPAPR_CAP_RPT_INVALIDATE,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_rpt_invalidate_apply,
+ },
};
static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -830,6 +870,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
+SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
void spapr_caps_init(SpaprMachineState *spapr)
{
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f25014a..0e9a5b2 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1233,8 +1233,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
spapr_setup_hpt(spapr);
}
- fdt = spapr_build_fdt(spapr, false, fdt_bufsize);
-
+ fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize);
g_free(spapr->fdt_blob);
spapr->fdt_size = fdt_totalsize(fdt);
spapr->fdt_initial_size = spapr->fdt_size;
@@ -1277,6 +1276,25 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
return ret;
}
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+ CPUState *cs,
+ target_ulong ovec_addr)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr,
+ ovec_addr, FDT_MAX_SIZE);
+
+ /*
+ * This adds stdout and generates phandles for boottime and CAS FDTs.
+ * It is alright to update the FDT here as do_client_architecture_support()
+ * does not pack it.
+ */
+ spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob);
+
+ return ret;
+}
+
static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
@@ -1299,6 +1317,8 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
break;
case SPAPR_CAP_FIXED:
+ behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY;
+ behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS;
break;
default: /* broken */
assert(safe_cache == SPAPR_CAP_BROKEN);
diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c
new file mode 100644
index 0000000..40ce8fe
--- /dev/null
+++ b/hw/ppc/spapr_vof.c
@@ -0,0 +1,167 @@
+/*
+ * SPAPR machine hooks to Virtual Open Firmware,
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/vof.h"
+#include "sysemu/sysemu.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *_args)
+{
+ int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob,
+ ppc64_phys_to_real(_args[0]));
+
+ if (ret) {
+ return H_PARAMETER;
+ }
+ return H_SUCCESS;
+}
+
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
+{
+ char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+
+ vof_build_dt(fdt, spapr->vof);
+
+ if (spapr->vof->bootargs) {
+ int chosen;
+
+ _FDT(chosen = fdt_path_offset(fdt, "/chosen"));
+ /*
+ * If the client did not change "bootargs", spapr_dt_chosen() must have
+ * stored machine->kernel_cmdline in it before getting here.
+ */
+ _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs));
+ }
+
+ /*
+ * SLOF-less setup requires an open instance of stdout for early
+ * kernel printk. By now all phandles are settled so we can open
+ * the default serial console.
+ */
+ if (stdout_path) {
+ _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout",
+ stdout_path));
+ }
+}
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+ target_ulong stack_ptr;
+ Vof *vof = spapr->vof;
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+ vof_init(vof, spapr->rma_size, errp);
+
+ stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE);
+ if (stack_ptr == -1) {
+ error_setg(errp, "Memory allocation for stack failed");
+ return;
+ }
+ /* Stack grows downwards plus reserve space for the minimum stack frame */
+ stack_ptr += VOF_STACK_SIZE - 0x20;
+
+ if (spapr->kernel_size &&
+ vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) {
+ error_setg(errp, "Memory for kernel is in use");
+ return;
+ }
+
+ if (spapr->initrd_size &&
+ vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) {
+ error_setg(errp, "Memory for initramdisk is in use");
+ return;
+ }
+
+ spapr_vof_client_dt_finalize(spapr, fdt);
+
+ spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+ stack_ptr, spapr->initrd_base,
+ spapr->initrd_size);
+ /* VOF is 32bit BE so enforce MSR here */
+ first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE));
+
+ /*
+ * At this point the expected allocation map is:
+ *
+ * 0..c38 - the initial firmware
+ * 8000..10000 - stack
+ * 400000.. - kernel
+ * 3ea0000.. - initramdisk
+ *
+ * We skip writing FDT as nothing expects it; OF client interface is
+ * going to be used for reading the device tree.
+ */
+}
+
+void spapr_vof_quiesce(MachineState *ms)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+ spapr->fdt_initial_size = spapr->fdt_size;
+}
+
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+ void *val, int vallen)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ /*
+ * We only allow changing properties which we know how to update in QEMU
+ * OR
+ * the ones which we know that they need to survive during "quiesce".
+ */
+
+ if (strcmp(path, "/rtas") == 0) {
+ if (strcmp(propname, "linux,rtas-base") == 0 ||
+ strcmp(propname, "linux,rtas-entry") == 0) {
+ /* These need to survive quiesce so let them store in the FDT */
+ return true;
+ }
+ }
+
+ if (strcmp(path, "/chosen") == 0) {
+ if (strcmp(propname, "bootargs") == 0) {
+ Vof *vof = spapr->vof;
+
+ g_free(vof->bootargs);
+ vof->bootargs = g_strndup(val, vallen);
+ return true;
+ }
+ if (strcmp(propname, "linux,initrd-start") == 0) {
+ if (vallen == sizeof(uint32_t)) {
+ spapr->initrd_base = ldl_be_p(val);
+ return true;
+ }
+ if (vallen == sizeof(uint64_t)) {
+ spapr->initrd_base = ldq_be_p(val);
+ return true;
+ }
+ return false;
+ }
+ if (strcmp(propname, "linux,initrd-end") == 0) {
+ if (vallen == sizeof(uint32_t)) {
+ spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base;
+ return true;
+ }
+ if (vallen == sizeof(uint64_t)) {
+ spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index 0ba3e40..6e90a01 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -71,6 +71,30 @@ spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx3
spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64
spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed"
+# vof.c
+vof_error_str_truncated(const char *s, int len) "%s truncated to %d"
+vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d"
+vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d"
+vof_error_unknown_method(const char *method) "\"%s\""
+vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x"
+vof_error_unknown_path(const char *path) "\"%s\""
+vof_error_write(uint32_t ih) "ih=0x%x"
+vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x"
+vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x"
+vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x"
+vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x"
+vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]"
+vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d"
+vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d"
+vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x"
+vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x"
+vof_package_to_path(uint32_t ph, const char *tmp, uint32_t ret) "ph=0x%x => %s len=%d"
+vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, uint32_t ret) "ih=0x%x ph=0x%x => %s len=%d"
+vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x"
+vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\""
+vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+
# ppc.c
ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
new file mode 100644
index 0000000..81f6596
--- /dev/null
+++ b/hw/ppc/vof.c
@@ -0,0 +1,1053 @@
+/*
+ * QEMU PowerPC Virtual Open Firmware.
+ *
+ * This implements client interface from OpenFirmware IEEE1275 on the QEMU
+ * side to leave only a very basic firmware in the VM.
+ *
+ * Copyright (c) 2021 IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/range.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "hw/ppc/vof.h"
+#include "hw/ppc/fdt.h"
+#include "sysemu/runstate.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+/*
+ * OF 1275 "nextprop" description suggests is it 32 bytes max but
+ * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long.
+ */
+#define OF_PROPNAME_LEN_MAX 64
+
+#define VOF_MAX_PATH 256
+#define VOF_MAX_SETPROPLEN 2048
+#define VOF_MAX_METHODLEN 256
+#define VOF_MAX_FORTHCODE 256
+#define VOF_VTY_BUF_SIZE 256
+
+typedef struct {
+ uint64_t start;
+ uint64_t size;
+} OfClaimed;
+
+typedef struct {
+ char *path; /* the path used to open the instance */
+ uint32_t phandle;
+} OfInstance;
+
+static int readstr(hwaddr pa, char *buf, int size)
+{
+ if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) {
+ return -1;
+ }
+ if (strnlen(buf, size) == size) {
+ buf[size - 1] = '\0';
+ trace_vof_error_str_truncated(buf, size);
+ return -1;
+ }
+ return 0;
+}
+
+static bool cmpservice(const char *s, unsigned nargs, unsigned nret,
+ const char *s1, unsigned nargscheck, unsigned nretcheck)
+{
+ if (strcmp(s, s1)) {
+ return false;
+ }
+ if ((nargscheck && (nargs != nargscheck)) ||
+ (nretcheck && (nret != nretcheck))) {
+ trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret);
+ return false;
+ }
+
+ return true;
+}
+
+static void prop_format(char *tval, int tlen, const void *prop, int len)
+{
+ int i;
+ const unsigned char *c;
+ char *t;
+ const char bin[] = "...";
+
+ for (i = 0, c = prop; i < len; ++i, ++c) {
+ if (*c == '\0' && i == len - 1) {
+ strncpy(tval, prop, tlen - 1);
+ return;
+ }
+ if (*c < 0x20 || *c >= 0x80) {
+ break;
+ }
+ }
+
+ for (i = 0, c = prop, t = tval; i < len; ++i, ++c) {
+ if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) {
+ strcpy(t, bin);
+ return;
+ }
+ if (i && i % 4 == 0 && i != len - 1) {
+ strcat(t, " ");
+ ++t;
+ }
+ t += sprintf(t, "%02X", *c & 0xFF);
+ }
+}
+
+static int get_path(const void *fdt, int offset, char *buf, int len)
+{
+ int ret;
+
+ ret = fdt_get_path(fdt, offset, buf, len - 1);
+ if (ret < 0) {
+ return ret;
+ }
+
+ buf[len - 1] = '\0';
+
+ return strlen(buf) + 1;
+}
+
+static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len)
+{
+ int ret;
+
+ ret = fdt_node_offset_by_phandle(fdt, ph);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return get_path(fdt, ret, buf, len);
+}
+
+static int path_offset(const void *fdt, const char *path)
+{
+ g_autofree char *p = NULL;
+ char *at;
+
+ /*
+ * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16
+ *
+ * "Conversion from numeric representation to text representation shall use
+ * the lower case forms of the hexadecimal digits in the range a..f,
+ * suppressing leading zeros".
+ */
+ p = g_strdup(path);
+ for (at = strchr(p, '@'); at && *at; ) {
+ if (*at == '/') {
+ at = strchr(at, '@');
+ } else {
+ *at = tolower(*at);
+ ++at;
+ }
+ }
+
+ return fdt_path_offset(fdt, p);
+}
+
+static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr)
+{
+ char fullnode[VOF_MAX_PATH];
+ uint32_t ret = -1;
+ int offset;
+
+ if (readstr(nodeaddr, fullnode, sizeof(fullnode))) {
+ return (uint32_t) ret;
+ }
+
+ offset = path_offset(fdt, fullnode);
+ if (offset >= 0) {
+ ret = fdt_get_phandle(fdt, offset);
+ }
+ trace_vof_finddevice(fullnode, ret);
+ return (uint32_t) ret;
+}
+
+static const void *getprop(const void *fdt, int nodeoff, const char *propname,
+ int *proplen, bool *write0)
+{
+ const char *unit, *prop;
+ const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen);
+
+ if (ret) {
+ if (write0) {
+ *write0 = false;
+ }
+ return ret;
+ }
+
+ if (strcmp(propname, "name")) {
+ return NULL;
+ }
+ /*
+ * We return a value for "name" from path if queried but property does not
+ * exist. @proplen does not include the unit part in this case.
+ */
+ prop = fdt_get_name(fdt, nodeoff, proplen);
+ if (!prop) {
+ *proplen = 0;
+ return NULL;
+ }
+
+ unit = memchr(prop, '@', *proplen);
+ if (unit) {
+ *proplen = unit - prop;
+ }
+ *proplen += 1;
+
+ /*
+ * Since it might be cut at "@" and there will be no trailing zero
+ * in the prop buffer, tell the caller to write zero at the end.
+ */
+ if (write0) {
+ *write0 = true;
+ }
+ return prop;
+}
+
+static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname,
+ uint32_t valaddr, uint32_t vallen)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = 0;
+ int proplen = 0;
+ const void *prop;
+ char trval[64] = "";
+ int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+ bool write0;
+
+ if (nodeoff < 0) {
+ return -1;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ return -1;
+ }
+ prop = getprop(fdt, nodeoff, propname, &proplen, &write0);
+ if (prop) {
+ const char zero = 0;
+ int cb = MIN(proplen, vallen);
+
+ if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK ||
+ /* if that was "name" with a unit address, overwrite '@' with '0' */
+ (write0 &&
+ cb == proplen &&
+ VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) {
+ ret = -1;
+ } else {
+ /*
+ * OF1275 says:
+ * "Size is either the actual size of the property, or -1 if name
+ * does not exist", hence returning proplen instead of cb.
+ */
+ ret = proplen;
+ /* Do not format a value if tracepoint is silent, for performance */
+ if (trace_event_get_state(TRACE_VOF_GETPROP) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ prop_format(trval, sizeof(trval), prop, ret);
+ }
+ }
+ } else {
+ ret = -1;
+ }
+ trace_vof_getprop(nodeph, propname, ret, trval);
+
+ return ret;
+}
+
+static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = 0;
+ int proplen = 0;
+ const void *prop;
+ int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+
+ if (nodeoff < 0) {
+ return -1;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ return -1;
+ }
+ prop = getprop(fdt, nodeoff, propname, &proplen, NULL);
+ if (prop) {
+ ret = proplen;
+ } else {
+ ret = -1;
+ }
+ trace_vof_getproplen(nodeph, propname, ret);
+
+ return ret;
+}
+
+static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof,
+ uint32_t nodeph, uint32_t pname,
+ uint32_t valaddr, uint32_t vallen)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = -1;
+ int offset;
+ char trval[64] = "";
+ char nodepath[VOF_MAX_PATH] = "";
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+ VofMachineIfClass *vmc;
+ g_autofree char *val = NULL;
+
+ if (vallen > VOF_MAX_SETPROPLEN) {
+ goto trace_exit;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ goto trace_exit;
+ }
+ offset = fdt_node_offset_by_phandle(fdt, nodeph);
+ if (offset < 0) {
+ goto trace_exit;
+ }
+ ret = get_path(fdt, offset, nodepath, sizeof(nodepath));
+ if (ret <= 0) {
+ goto trace_exit;
+ }
+
+ val = g_malloc0(vallen);
+ if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) {
+ goto trace_exit;
+ }
+
+ if (!vmo) {
+ goto trace_exit;
+ }
+
+ vmc = VOF_MACHINE_GET_CLASS(vmo);
+ if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) {
+ goto trace_exit;
+ }
+
+ ret = fdt_setprop(fdt, offset, propname, val, vallen);
+ if (ret) {
+ goto trace_exit;
+ }
+
+ if (trace_event_get_state(TRACE_VOF_SETPROP) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ prop_format(trval, sizeof(trval), val, vallen);
+ }
+ ret = vallen;
+
+trace_exit:
+ trace_vof_setprop(nodeph, propname, trval, vallen, ret);
+
+ return ret;
+}
+
+static uint32_t vof_nextprop(const void *fdt, uint32_t phandle,
+ uint32_t prevaddr, uint32_t nameaddr)
+{
+ int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle);
+ char prev[OF_PROPNAME_LEN_MAX + 1];
+ const char *tmp;
+
+ if (readstr(prevaddr, prev, sizeof(prev))) {
+ return -1;
+ }
+
+ fdt_for_each_property_offset(offset, fdt, nodeoff) {
+ if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+ return 0;
+ }
+ if (prev[0] == '\0' || strcmp(prev, tmp) == 0) {
+ if (prev[0] != '\0') {
+ offset = fdt_next_property_offset(fdt, offset);
+ if (offset < 0) {
+ return 0;
+ }
+ }
+ if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+ return 0;
+ }
+
+ if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
+ return -1;
+ }
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t vof_peer(const void *fdt, uint32_t phandle)
+{
+ int ret;
+
+ if (phandle == 0) {
+ ret = fdt_path_offset(fdt, "/");
+ } else {
+ ret = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+ }
+
+ if (ret < 0) {
+ ret = 0;
+ } else {
+ ret = fdt_get_phandle(fdt, ret);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_child(const void *fdt, uint32_t phandle)
+{
+ int ret = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+ if (ret < 0) {
+ ret = 0;
+ } else {
+ ret = fdt_get_phandle(fdt, ret);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_parent(const void *fdt, uint32_t phandle)
+{
+ int ret = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+ if (ret < 0) {
+ ret = 0;
+ } else {
+ ret = fdt_get_phandle(fdt, ret);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path)
+{
+ uint32_t ret = -1;
+ OfInstance *inst = NULL;
+
+ if (vof->of_instance_last == 0xFFFFFFFF) {
+ /* We do not recycle ihandles yet */
+ goto trace_exit;
+ }
+
+ inst = g_new0(OfInstance, 1);
+ inst->phandle = fdt_get_phandle(fdt, offset);
+ g_assert(inst->phandle);
+ ++vof->of_instance_last;
+
+ inst->path = g_strdup(path);
+ g_hash_table_insert(vof->of_instances,
+ GINT_TO_POINTER(vof->of_instance_last),
+ inst);
+ ret = vof->of_instance_last;
+
+trace_exit:
+ trace_vof_open(path, inst ? inst->phandle : 0, ret);
+
+ return ret;
+}
+
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+ const char *prop, const char *path)
+{
+ int node = fdt_path_offset(fdt, nodename);
+ int inst, offset;
+
+ offset = fdt_path_offset(fdt, path);
+ if (offset < 0) {
+ trace_vof_error_unknown_path(path);
+ return offset;
+ }
+
+ inst = vof_do_open(fdt, vof, offset, path);
+
+ return fdt_setprop_cell(fdt, node, prop, inst);
+}
+
+static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr)
+{
+ char path[VOF_MAX_PATH];
+ int offset;
+
+ if (readstr(pathaddr, path, sizeof(path))) {
+ return -1;
+ }
+
+ offset = path_offset(fdt, path);
+ if (offset < 0) {
+ trace_vof_error_unknown_path(path);
+ return offset;
+ }
+
+ return vof_do_open(fdt, vof, offset, path);
+}
+
+static void vof_close(Vof *vof, uint32_t ihandle)
+{
+ if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) {
+ trace_vof_error_unknown_ihandle_close(ihandle);
+ }
+}
+
+static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle)
+{
+ gpointer instp = g_hash_table_lookup(vof->of_instances,
+ GINT_TO_POINTER(ihandle));
+ uint32_t ret = -1;
+
+ if (instp) {
+ ret = ((OfInstance *)instp)->phandle;
+ }
+ trace_vof_instance_to_package(ihandle, ret);
+
+ return ret;
+}
+
+static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle,
+ uint32_t buf, uint32_t len)
+{
+ uint32_t ret = -1;
+ char tmp[VOF_MAX_PATH] = "";
+
+ ret = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+ if (ret > 0) {
+ if (VOF_MEM_WRITE(buf, tmp, ret) != MEMTX_OK) {
+ ret = -1;
+ }
+ }
+
+ trace_vof_package_to_path(phandle, tmp, ret);
+
+ return ret;
+}
+
+static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle,
+ uint32_t buf, uint32_t len)
+{
+ uint32_t ret = -1;
+ uint32_t phandle = vof_instance_to_package(vof, ihandle);
+ char tmp[VOF_MAX_PATH] = "";
+
+ if (phandle != -1) {
+ ret = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+ if (ret > 0) {
+ if (VOF_MEM_WRITE(buf, tmp, ret) != MEMTX_OK) {
+ ret = -1;
+ }
+ }
+ }
+ trace_vof_instance_to_path(ihandle, phandle, tmp, ret);
+
+ return ret;
+}
+
+static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf,
+ uint32_t len)
+{
+ char tmp[VOF_VTY_BUF_SIZE];
+ unsigned cb;
+ OfInstance *inst = (OfInstance *)
+ g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle));
+
+ if (!inst) {
+ trace_vof_error_write(ihandle);
+ return -1;
+ }
+
+ for ( ; len > 0; len -= cb) {
+ cb = MIN(len, sizeof(tmp) - 1);
+ if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) {
+ return -1;
+ }
+
+ /* FIXME: there is no backend(s) yet so just call a trace */
+ if (trace_event_get_state(TRACE_VOF_WRITE) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ tmp[cb] = '\0';
+ trace_vof_write(ihandle, cb, tmp);
+ }
+ }
+
+ return len;
+}
+
+static void vof_claimed_dump(GArray *claimed)
+{
+ int i;
+ OfClaimed c;
+
+ if (trace_event_get_state(TRACE_VOF_CLAIMED) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ trace_vof_claimed(c.start, c.start + c.size, c.size);
+ }
+ }
+}
+
+static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size)
+{
+ int i;
+ OfClaimed c;
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ if (ranges_overlap(c.start, c.size, virt, size)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size)
+{
+ OfClaimed newclaim;
+
+ newclaim.start = virt;
+ newclaim.size = size;
+ g_array_append_val(claimed, newclaim);
+}
+
+static gint of_claimed_compare_func(gconstpointer a, gconstpointer b)
+{
+ return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start;
+}
+
+static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base)
+{
+ int i, n, offset, proplen = 0, sc, ac;
+ target_ulong mem0_end;
+ const uint8_t *mem0_reg;
+ g_autofree uint8_t *avail = NULL;
+ uint8_t *availcur;
+
+ if (!fdt || !claimed) {
+ return;
+ }
+
+ offset = fdt_path_offset(fdt, "/");
+ _FDT(offset);
+ ac = fdt_address_cells(fdt, offset);
+ g_assert(ac == 1 || ac == 2);
+ sc = fdt_size_cells(fdt, offset);
+ g_assert(sc == 1 || sc == 2);
+
+ offset = fdt_path_offset(fdt, "/memory@0");
+ _FDT(offset);
+
+ mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen);
+ g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc));
+ if (sc == 2) {
+ mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac));
+ } else {
+ mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac));
+ }
+
+ g_array_sort(claimed, of_claimed_compare_func);
+ vof_claimed_dump(claimed);
+
+ /*
+ * VOF resides in the first page so we do not need to check if there is
+ * available memory before the first claimed block
+ */
+ g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0));
+
+ avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len);
+ for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) {
+ OfClaimed c = g_array_index(claimed, OfClaimed, i);
+ uint64_t start, size;
+
+ start = c.start + c.size;
+ if (i < claimed->len - 1) {
+ OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1);
+
+ size = cn.start - start;
+ } else {
+ size = mem0_end - start;
+ }
+
+ if (ac == 2) {
+ *(uint64_t *) availcur = cpu_to_be64(start);
+ } else {
+ *(uint32_t *) availcur = cpu_to_be32(start);
+ }
+ availcur += sizeof(uint32_t) * ac;
+ if (sc == 2) {
+ *(uint64_t *) availcur = cpu_to_be64(size);
+ } else {
+ *(uint32_t *) availcur = cpu_to_be32(size);
+ }
+ availcur += sizeof(uint32_t) * sc;
+
+ if (size) {
+ trace_vof_avail(c.start + c.size, c.start + c.size + size, size);
+ ++n;
+ }
+ }
+ _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail)));
+}
+
+/*
+ * OF1275:
+ * "Allocates size bytes of memory. If align is zero, the allocated range
+ * begins at the virtual address virt. Otherwise, an aligned address is
+ * automatically chosen and the input argument virt is ignored".
+ *
+ * In other words, exactly one of @virt and @align is non-zero.
+ */
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size,
+ uint64_t align)
+{
+ uint64_t ret;
+
+ if (size == 0) {
+ ret = -1;
+ } else if (align == 0) {
+ if (!vof_claim_avail(vof->claimed, virt, size)) {
+ ret = -1;
+ } else {
+ ret = virt;
+ }
+ } else {
+ vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align);
+ while (1) {
+ if (vof->claimed_base >= vof->top_addr) {
+ error_report("Out of RMA memory for the OF client");
+ return -1;
+ }
+ if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) {
+ break;
+ }
+ vof->claimed_base += size;
+ }
+ ret = vof->claimed_base;
+ }
+
+ if (ret != -1) {
+ vof->claimed_base = MAX(vof->claimed_base, ret + size);
+ vof_claim_add(vof->claimed, ret, size);
+ }
+ trace_vof_claim(virt, size, align, ret);
+
+ return ret;
+}
+
+static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size)
+{
+ uint32_t ret = -1;
+ int i;
+ GArray *claimed = vof->claimed;
+ OfClaimed c;
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ if (c.start == virt && c.size == size) {
+ g_array_remove_index(claimed, i);
+ ret = 0;
+ break;
+ }
+ }
+
+ trace_vof_release(virt, size, ret);
+
+ return ret;
+}
+
+static void vof_instantiate_rtas(Error **errp)
+{
+ error_setg(errp, "The firmware should have instantiated RTAS");
+}
+
+static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr,
+ uint32_t ihandle, uint32_t param1,
+ uint32_t param2, uint32_t param3,
+ uint32_t param4, uint32_t *ret2)
+{
+ uint32_t ret = -1;
+ char method[VOF_MAX_METHODLEN] = "";
+ OfInstance *inst;
+
+ if (!ihandle) {
+ goto trace_exit;
+ }
+
+ inst = (OfInstance *)g_hash_table_lookup(vof->of_instances,
+ GINT_TO_POINTER(ihandle));
+ if (!inst) {
+ goto trace_exit;
+ }
+
+ if (readstr(methodaddr, method, sizeof(method))) {
+ goto trace_exit;
+ }
+
+ if (strcmp(inst->path, "/") == 0) {
+ if (strcmp(method, "ibm,client-architecture-support") == 0) {
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+
+ if (vmo) {
+ VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+ g_assert(vmc->client_architecture_support);
+ ret = vmc->client_architecture_support(ms, first_cpu, param1);
+ }
+
+ *ret2 = 0;
+ }
+ } else if (strcmp(inst->path, "/rtas") == 0) {
+ if (strcmp(method, "instantiate-rtas") == 0) {
+ vof_instantiate_rtas(&error_fatal);
+ ret = 0;
+ *ret2 = param1; /* rtas-base */
+ }
+ } else {
+ trace_vof_error_unknown_method(method);
+ }
+
+trace_exit:
+ trace_vof_method(ihandle, method, param1, ret, *ret2);
+
+ return ret;
+}
+
+static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1,
+ uint32_t param2, uint32_t *ret2)
+{
+ uint32_t ret = -1;
+ char cmd[VOF_MAX_FORTHCODE] = "";
+
+ /* No interpret implemented so just call a trace */
+ readstr(cmdaddr, cmd, sizeof(cmd));
+ trace_vof_interpret(cmd, param1, param2, ret, *ret2);
+
+ return ret;
+}
+
+static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof)
+{
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+ /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */
+ int rc = fdt_pack(fdt);
+
+ assert(rc == 0);
+
+ if (vmo) {
+ VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+ if (vmc->quiesce) {
+ vmc->quiesce(ms);
+ }
+ }
+
+ vof_claimed_dump(vof->claimed);
+}
+
+static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof,
+ const char *service,
+ uint32_t *args, unsigned nargs,
+ uint32_t *rets, unsigned nrets)
+{
+ uint32_t ret = 0;
+
+ /* @nrets includes the value which this function returns */
+#define cmpserv(s, a, r) \
+ cmpservice(service, nargs, nrets, (s), (a), (r))
+
+ if (cmpserv("finddevice", 1, 1)) {
+ ret = vof_finddevice(fdt, args[0]);
+ } else if (cmpserv("getprop", 4, 1)) {
+ ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]);
+ } else if (cmpserv("getproplen", 2, 1)) {
+ ret = vof_getproplen(fdt, args[0], args[1]);
+ } else if (cmpserv("setprop", 4, 1)) {
+ ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]);
+ } else if (cmpserv("nextprop", 3, 1)) {
+ ret = vof_nextprop(fdt, args[0], args[1], args[2]);
+ } else if (cmpserv("peer", 1, 1)) {
+ ret = vof_peer(fdt, args[0]);
+ } else if (cmpserv("child", 1, 1)) {
+ ret = vof_child(fdt, args[0]);
+ } else if (cmpserv("parent", 1, 1)) {
+ ret = vof_parent(fdt, args[0]);
+ } else if (cmpserv("open", 1, 1)) {
+ ret = vof_open(fdt, vof, args[0]);
+ } else if (cmpserv("close", 1, 0)) {
+ vof_close(vof, args[0]);
+ } else if (cmpserv("instance-to-package", 1, 1)) {
+ ret = vof_instance_to_package(vof, args[0]);
+ } else if (cmpserv("package-to-path", 3, 1)) {
+ ret = vof_package_to_path(fdt, args[0], args[1], args[2]);
+ } else if (cmpserv("instance-to-path", 3, 1)) {
+ ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]);
+ } else if (cmpserv("write", 3, 1)) {
+ ret = vof_write(vof, args[0], args[1], args[2]);
+ } else if (cmpserv("claim", 3, 1)) {
+ ret = vof_claim(vof, args[0], args[1], args[2]);
+ if (ret != -1) {
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+ }
+ } else if (cmpserv("release", 2, 0)) {
+ ret = vof_release(vof, args[0], args[1]);
+ if (ret != -1) {
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+ }
+ } else if (cmpserv("call-method", 0, 0)) {
+ ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3],
+ args[4], args[5], rets);
+ } else if (cmpserv("interpret", 0, 0)) {
+ ret = vof_call_interpret(args[0], args[1], args[2], rets);
+ } else if (cmpserv("milliseconds", 0, 1)) {
+ ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ } else if (cmpserv("quiesce", 0, 0)) {
+ vof_quiesce(ms, fdt, vof);
+ } else if (cmpserv("exit", 0, 0)) {
+ error_report("Stopped as the VM requested \"exit\"");
+ vm_stop(RUN_STATE_PAUSED);
+ } else {
+ trace_vof_error_unknown_service(service, nargs, nrets);
+ ret = -1;
+ }
+
+#undef cmpserv
+
+ return ret;
+}
+
+/* Defined as Big Endian */
+struct prom_args {
+ uint32_t service;
+ uint32_t nargs;
+ uint32_t nret;
+ uint32_t args[10];
+} QEMU_PACKED;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+ target_ulong args_real)
+{
+ struct prom_args args_be;
+ uint32_t args[ARRAY_SIZE(args_be.args)];
+ uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret;
+ char service[64];
+ unsigned nargs, nret, i;
+
+ if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) {
+ return -EINVAL;
+ }
+ nargs = be32_to_cpu(args_be.nargs);
+ if (nargs >= ARRAY_SIZE(args_be.args)) {
+ return -EINVAL;
+ }
+
+ if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) !=
+ MEMTX_OK) {
+ return -EINVAL;
+ }
+ if (strnlen(service, sizeof(service)) == sizeof(service)) {
+ /* Too long service name */
+ return -EINVAL;
+ }
+
+ for (i = 0; i < nargs; ++i) {
+ args[i] = be32_to_cpu(args_be.args[i]);
+ }
+
+ nret = be32_to_cpu(args_be.nret);
+ ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret);
+ if (!nret) {
+ return 0;
+ }
+
+ args_be.args[nargs] = cpu_to_be32(ret);
+ for (i = 1; i < nret; ++i) {
+ args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]);
+ }
+
+ if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]),
+ args_be.args + nargs, sizeof(args_be.args[0]) * nret) !=
+ MEMTX_OK) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vof_instance_free(gpointer data)
+{
+ OfInstance *inst = (OfInstance *)data;
+
+ g_free(inst->path);
+ g_free(inst);
+}
+
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp)
+{
+ vof_cleanup(vof);
+
+ vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, vof_instance_free);
+ vof->claimed = g_array_new(false, false, sizeof(OfClaimed));
+
+ /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */
+ vof->top_addr = MIN(top_addr, 4 * GiB);
+ if (vof_claim(vof, 0, vof->fw_size, 0) == -1) {
+ error_setg(errp, "Memory for firmware is in use");
+ }
+}
+
+void vof_cleanup(Vof *vof)
+{
+ if (vof->claimed) {
+ g_array_unref(vof->claimed);
+ }
+ if (vof->of_instances) {
+ g_hash_table_unref(vof->of_instances);
+ }
+ vof->claimed = NULL;
+ vof->of_instances = NULL;
+}
+
+void vof_build_dt(void *fdt, Vof *vof)
+{
+ uint32_t phandle = fdt_get_max_phandle(fdt);
+ int offset, proplen = 0;
+ const void *prop;
+
+ /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */
+ for (offset = fdt_next_node(fdt, -1, NULL);
+ offset >= 0;
+ offset = fdt_next_node(fdt, offset, NULL)) {
+ prop = fdt_getprop(fdt, offset, "phandle", &proplen);
+ if (prop) {
+ continue;
+ }
+ ++phandle;
+ _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle));
+ }
+
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+}
+
+static const TypeInfo vof_machine_if_info = {
+ .name = TYPE_VOF_MACHINE_IF,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(VofMachineIfClass),
+};
+
+static void vof_machine_if_register_types(void)
+{
+ type_register_static(&vof_machine_if_info);
+}
+type_init(vof_machine_if_register_types)
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index d68888f..6a2df1c 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -31,6 +31,7 @@
#include "trace.h"
#include "hw/s390x/css-bridge.h"
#include "hw/s390x/s390-virtio-ccw.h"
+#include "sysemu/replay.h"
#define NR_CLASSIC_INDICATOR_BITS 64
@@ -770,6 +771,11 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
}
+ /* fd-based ioevents can't be synchronized in record/replay */
+ if (replay_mode != REPLAY_MODE_NONE) {
+ dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
+ }
+
if (k->realize) {
k->realize(dev, &err);
if (err) {
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 28e0032..18eb824 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -152,6 +152,10 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
goto fail_guest_notifiers;
}
+ /*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
memory_region_transaction_begin();
rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0);
@@ -198,6 +202,10 @@ fail_host_notifiers:
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
}
+ /*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
memory_region_transaction_commit();
for (i = 0; i < vq_init_count; i++) {
@@ -238,12 +246,20 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
blk_drain_all(); /* ensure there are no in-flight requests */
+ /*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
memory_region_transaction_begin();
for (i = 0; i < vs->conf.num_queues + 2; i++) {
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
}
+ /*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
memory_region_transaction_commit();
for (i = 0; i < vs->conf.num_queues + 2; i++) {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ae5654f..3f0d111 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -36,6 +36,7 @@
#include "qemu/range.h"
#include "sysemu/kvm.h"
#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
@@ -134,6 +135,29 @@ static const char *index_to_str(VFIODevice *vbasedev, int index)
}
}
+static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
+{
+ switch (container->iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ /*
+ * We support coordinated discarding of RAM via the RamDiscardManager.
+ */
+ return ram_block_uncoordinated_discard_disable(state);
+ default:
+ /*
+ * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
+ * RamDiscardManager, however, it is completely untested.
+ *
+ * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
+ * completely the opposite of managing mapping/pinning dynamically as
+ * required by RamDiscardManager. We would have to special-case sections
+ * with a RamDiscardManager.
+ */
+ return ram_block_discard_disable(state);
+ }
+}
+
int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
int action, int fd, Error **errp)
{
@@ -569,6 +593,44 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
error_report("iommu map to non memory area %"HWADDR_PRIx"",
xlat);
return false;
+ } else if (memory_region_has_ram_discard_manager(mr)) {
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
+ MemoryRegionSection tmp = {
+ .mr = mr,
+ .offset_within_region = xlat,
+ .size = int128_make64(len),
+ };
+
+ /*
+ * Malicious VMs can map memory into the IOMMU, which is expected
+ * to remain discarded. vfio will pin all pages, populating memory.
+ * Disallow that. vmstate priorities make sure any RamDiscardManager
+ * were already restored before IOMMUs are restored.
+ */
+ if (!ram_discard_manager_is_populated(rdm, &tmp)) {
+ error_report("iommu map to discarded memory (e.g., unplugged via"
+ " virtio-mem): %"HWADDR_PRIx"",
+ iotlb->translated_addr);
+ return false;
+ }
+
+ /*
+ * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
+ * pages will remain pinned inside vfio until unmapped, resulting in a
+ * higher memory consumption than expected. If memory would get
+ * populated again later, there would be an inconsistency between pages
+ * pinned by vfio and pages seen by QEMU. This is the case until
+ * unmapped from the IOMMU (e.g., during device reset).
+ *
+ * With malicious guests, we really only care about pinning more memory
+ * than expected. RLIMIT_MEMLOCK set for the user/process can never be
+ * exceeded and can be used to mitigate this problem.
+ */
+ warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
+ " RAM (e.g., virtio-mem) works, however, malicious"
+ " guests can trigger pinning of more memory than"
+ " intended via an IOMMU. It's possible to mitigate "
+ " by setting/adjusting RLIMIT_MEMLOCK.");
}
/*
@@ -649,6 +711,153 @@ out:
rcu_read_unlock();
}
+static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+ listener);
+ const hwaddr size = int128_get64(section->size);
+ const hwaddr iova = section->offset_within_address_space;
+ int ret;
+
+ /* Unmap with a single call. */
+ ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
+ if (ret) {
+ error_report("%s: vfio_dma_unmap() failed: %s", __func__,
+ strerror(-ret));
+ }
+}
+
+static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+ listener);
+ const hwaddr end = section->offset_within_region +
+ int128_get64(section->size);
+ hwaddr start, next, iova;
+ void *vaddr;
+ int ret;
+
+ /*
+ * Map in (aligned within memory region) minimum granularity, so we can
+ * unmap in minimum granularity later.
+ */
+ for (start = section->offset_within_region; start < end; start = next) {
+ next = ROUND_UP(start + 1, vrdl->granularity);
+ next = MIN(next, end);
+
+ iova = start - section->offset_within_region +
+ section->offset_within_address_space;
+ vaddr = memory_region_get_ram_ptr(section->mr) + start;
+
+ ret = vfio_dma_map(vrdl->container, iova, next - start,
+ vaddr, section->readonly);
+ if (ret) {
+ /* Rollback */
+ vfio_ram_discard_notify_discard(rdl, section);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void vfio_register_ram_discard_listener(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+ VFIORamDiscardListener *vrdl;
+
+ /* Ignore some corner cases not relevant in practice. */
+ g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
+ g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
+ TARGET_PAGE_SIZE));
+ g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
+
+ vrdl = g_new0(VFIORamDiscardListener, 1);
+ vrdl->container = container;
+ vrdl->mr = section->mr;
+ vrdl->offset_within_address_space = section->offset_within_address_space;
+ vrdl->size = int128_get64(section->size);
+ vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
+ section->mr);
+
+ g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
+ g_assert(vrdl->granularity >= 1 << ctz64(container->pgsizes));
+
+ ram_discard_listener_init(&vrdl->listener,
+ vfio_ram_discard_notify_populate,
+ vfio_ram_discard_notify_discard, true);
+ ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
+ QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
+
+ /*
+ * Sanity-check if we have a theoretically problematic setup where we could
+ * exceed the maximum number of possible DMA mappings over time. We assume
+ * that each mapped section in the same address space as a RamDiscardManager
+ * section consumes exactly one DMA mapping, with the exception of
+ * RamDiscardManager sections; i.e., we don't expect to have gIOMMU sections
+ * in the same address space as RamDiscardManager sections.
+ *
+ * We assume that each section in the address space consumes one memslot.
+ * We take the number of KVM memory slots as a best guess for the maximum
+ * number of sections in the address space we could have over time,
+ * also consuming DMA mappings.
+ */
+ if (container->dma_max_mappings) {
+ unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
+
+#ifdef CONFIG_KVM
+ if (kvm_enabled()) {
+ max_memslots = kvm_get_max_memslots();
+ }
+#endif
+
+ QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+ hwaddr start, end;
+
+ start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
+ vrdl->granularity);
+ end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size,
+ vrdl->granularity);
+ vrdl_mappings += (end - start) / vrdl->granularity;
+ vrdl_count++;
+ }
+
+ if (vrdl_mappings + max_memslots - vrdl_count >
+ container->dma_max_mappings) {
+ warn_report("%s: possibly running out of DMA mappings. E.g., try"
+ " increasing the 'block-size' of virtio-mem devies."
+ " Maximum possible DMA mappings: %d, Maximum possible"
+ " memslots: %d", __func__, container->dma_max_mappings,
+ max_memslots);
+ }
+ }
+}
+
+static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to unregister missing RAM discard listener");
+ }
+
+ ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
+ QLIST_REMOVE(vrdl, next);
+ g_free(vrdl);
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -810,6 +1019,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
/* Here we assume that memory_region_is_ram(section->mr)==true */
+ /*
+ * For RAM memory regions with a RamDiscardManager, we only want to map the
+ * actually populated parts - and update the mapping whenever we're notified
+ * about changes.
+ */
+ if (memory_region_has_ram_discard_manager(section->mr)) {
+ vfio_register_ram_discard_listener(container, section);
+ return;
+ }
+
vaddr = memory_region_get_ram_ptr(section->mr) +
section->offset_within_region +
(iova - section->offset_within_address_space);
@@ -947,6 +1166,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+ } else if (memory_region_has_ram_discard_manager(section->mr)) {
+ vfio_unregister_ram_discard_listener(container, section);
+ /* Unregistering will trigger an unmap. */
+ try_unmap = false;
}
if (try_unmap) {
@@ -1108,6 +1331,49 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
rcu_read_unlock();
}
+static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
+ void *opaque)
+{
+ const hwaddr size = int128_get64(section->size);
+ const hwaddr iova = section->offset_within_address_space;
+ const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
+ section->offset_within_region;
+ VFIORamDiscardListener *vrdl = opaque;
+
+ /*
+ * Sync the whole mapped region (spanning multiple individual mappings)
+ * in one go.
+ */
+ return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
+}
+
+static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ }
+
+ /*
+ * We only want/can synchronize the bitmap for actually mapped parts -
+ * which correspond to populated parts. Replay all populated parts.
+ */
+ return ram_discard_manager_replay_populated(rdm, section,
+ vfio_ram_discard_get_dirty_bitmap,
+ &vrdl);
+}
+
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
MemoryRegionSection *section)
{
@@ -1139,6 +1405,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
}
}
return 0;
+ } else if (memory_region_has_ram_discard_manager(section->mr)) {
+ return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
}
ram_addr = memory_region_get_ram_addr(section->mr) +
@@ -1732,15 +2000,25 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
* new memory, it will not yet set ram_block_discard_set_required() and
* therefore, neither stops us here or deals with the sudden memory
* consumption of inflated memory.
+ *
+ * We do support discarding of memory coordinated via the RamDiscardManager
+ * with some IOMMU types. vfio_ram_block_discard_disable() handles the
+ * details once we know which type of IOMMU we are using.
*/
- ret = ram_block_discard_disable(true);
- if (ret) {
- error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
- return ret;
- }
QLIST_FOREACH(container, &space->containers, next) {
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ ret = vfio_ram_block_discard_disable(container, true);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Cannot set discarding of RAM broken");
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER,
+ &container->fd)) {
+ error_report("vfio: error disconnecting group %d from"
+ " container", group->groupid);
+ }
+ return ret;
+ }
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
vfio_kvm_device_add_group(group);
@@ -1768,14 +2046,22 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
container->fd = fd;
container->error = NULL;
container->dirty_pages_supported = false;
+ container->dma_max_mappings = 0;
QLIST_INIT(&container->giommu_list);
QLIST_INIT(&container->hostwin_list);
+ QLIST_INIT(&container->vrdl_list);
ret = vfio_init_container(container, group->fd, errp);
if (ret) {
goto free_container_exit;
}
+ ret = vfio_ram_block_discard_disable(container, true);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+ goto free_container_exit;
+ }
+
switch (container->iommu_type) {
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_IOMMU:
@@ -1798,7 +2084,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
container->pgsizes = info->iova_pgsizes;
+ /* The default in the kernel ("dma_entry_limit") is 65535. */
+ container->dma_max_mappings = 65535;
if (!ret) {
+ vfio_get_info_dma_avail(info, &container->dma_max_mappings);
vfio_get_iommu_info_migration(container, info);
}
g_free(info);
@@ -1820,7 +2109,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
if (ret) {
error_setg_errno(errp, errno, "failed to enable container");
ret = -errno;
- goto free_container_exit;
+ goto enable_discards_exit;
}
} else {
container->prereg_listener = vfio_prereg_listener;
@@ -1832,7 +2121,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
ret = -1;
error_propagate_prepend(errp, container->error,
"RAM memory listener initialization failed: ");
- goto free_container_exit;
+ goto enable_discards_exit;
}
}
@@ -1845,7 +2134,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
if (v2) {
memory_listener_unregister(&container->prereg_listener);
}
- goto free_container_exit;
+ goto enable_discards_exit;
}
if (v2) {
@@ -1860,7 +2149,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
if (ret) {
error_setg_errno(errp, -ret,
"failed to remove existing window");
- goto free_container_exit;
+ goto enable_discards_exit;
}
} else {
/* The default table uses 4K pages */
@@ -1901,6 +2190,9 @@ listener_release_exit:
vfio_kvm_device_del_group(group);
vfio_listener_release(container);
+enable_discards_exit:
+ vfio_ram_block_discard_disable(container, false);
+
free_container_exit:
g_free(container);
@@ -1908,7 +2200,6 @@ close_fd_exit:
close(fd);
put_space_exit:
- ram_block_discard_disable(false);
vfio_put_address_space(space);
return ret;
@@ -2030,7 +2321,7 @@ void vfio_put_group(VFIOGroup *group)
}
if (!group->ram_block_discard_allowed) {
- ram_block_discard_disable(false);
+ vfio_ram_block_discard_disable(group->container, false);
}
vfio_kvm_device_del_group(group);
vfio_disconnect_container(group);
@@ -2084,7 +2375,7 @@ int vfio_get_device(VFIOGroup *group, const char *name,
if (!group->ram_block_discard_allowed) {
group->ram_block_discard_allowed = true;
- ram_block_discard_disable(false);
+ vfio_ram_block_discard_disable(group->container, false);
}
}
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 75aa7d6..df91e45 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -145,7 +145,173 @@ static bool virtio_mem_is_busy(void)
return migration_in_incoming_postcopy() || !migration_is_idle();
}
-static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
+typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size);
+
+static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
+ virtio_mem_range_cb cb)
+{
+ unsigned long first_zero_bit, last_zero_bit;
+ uint64_t offset, size;
+ int ret = 0;
+
+ first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
+ while (first_zero_bit < vmem->bitmap_size) {
+ offset = first_zero_bit * vmem->block_size;
+ last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+ first_zero_bit + 1) - 1;
+ size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+
+ ret = cb(vmem, arg, offset, size);
+ if (ret) {
+ break;
+ }
+ first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+ last_zero_bit + 2);
+ }
+ return ret;
+}
+
+/*
+ * Adjust the memory section to cover the intersection with the given range.
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
+ uint64_t offset, uint64_t size)
+{
+ uint64_t start = MAX(s->offset_within_region, offset);
+ uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
+ offset + size);
+
+ if (end <= start) {
+ return false;
+ }
+
+ s->offset_within_address_space += start - s->offset_within_region;
+ s->offset_within_region = start;
+ s->size = int128_make64(end - start);
+ return true;
+}
+
+typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
+
+static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
+ MemoryRegionSection *s,
+ void *arg,
+ virtio_mem_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ int ret = 0;
+
+ first_bit = s->offset_within_region / vmem->bitmap_size;
+ first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+ while (first_bit < vmem->bitmap_size) {
+ MemoryRegionSection tmp = *s;
+
+ offset = first_bit * vmem->block_size;
+ last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * vmem->block_size;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ break;
+ }
+ ret = cb(&tmp, arg);
+ if (ret) {
+ break;
+ }
+ first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+ last_bit + 2);
+ }
+ return ret;
+}
+
+static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ return rdl->notify_populate(rdl, s);
+}
+
+static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ rdl->notify_discard(rdl, s);
+ return 0;
+}
+
+static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl;
+
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ continue;
+ }
+ rdl->notify_discard(rdl, &tmp);
+ }
+}
+
+static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl, *rdl2;
+ int ret = 0;
+
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ continue;
+ }
+ ret = rdl->notify_populate(rdl, &tmp);
+ if (ret) {
+ break;
+ }
+ }
+
+ if (ret) {
+ /* Notify all already-notified listeners. */
+ QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (rdl2 == rdl) {
+ break;
+ }
+ if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+ continue;
+ }
+ rdl2->notify_discard(rdl2, &tmp);
+ }
+ }
+ return ret;
+}
+
+static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
+{
+ RamDiscardListener *rdl;
+
+ if (!vmem->size) {
+ return;
+ }
+
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_discard_cb);
+ }
+ }
+}
+
+static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
uint64_t size, bool plugged)
{
const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
@@ -198,7 +364,8 @@ static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
virtio_mem_send_response(vmem, elem, &resp);
}
-static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size)
+static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
+ uint64_t size)
{
if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
return false;
@@ -219,19 +386,21 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
uint64_t size, bool plug)
{
const uint64_t offset = start_gpa - vmem->addr;
- int ret;
+ RAMBlock *rb = vmem->memdev->mr.ram_block;
if (virtio_mem_is_busy()) {
return -EBUSY;
}
if (!plug) {
- ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
- if (ret) {
- error_report("Unexpected error discarding RAM: %s",
- strerror(-ret));
+ if (ram_block_discard_range(rb, offset, size)) {
return -EBUSY;
}
+ virtio_mem_notify_unplug(vmem, offset, size);
+ } else if (virtio_mem_notify_plug(vmem, offset, size)) {
+ /* Could be a mapping attempt resulted in memory getting populated. */
+ ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
+ return -EBUSY;
}
virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
return 0;
@@ -318,17 +487,16 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
static int virtio_mem_unplug_all(VirtIOMEM *vmem)
{
RAMBlock *rb = vmem->memdev->mr.ram_block;
- int ret;
if (virtio_mem_is_busy()) {
return -EBUSY;
}
- ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
- if (ret) {
- error_report("Unexpected error discarding RAM: %s", strerror(-ret));
+ if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
return -EBUSY;
}
+ virtio_mem_notify_unplug_all(vmem);
+
bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
if (vmem->size) {
vmem->size = 0;
@@ -551,7 +719,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
return;
}
- if (ram_block_discard_require(true)) {
+ if (ram_block_coordinated_discard_require(true)) {
error_setg(errp, "Discarding RAM is disabled");
return;
}
@@ -559,7 +727,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
if (ret) {
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
return;
}
@@ -577,6 +745,13 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
qemu_register_reset(virtio_mem_system_reset, vmem);
precopy_add_notifier(&vmem->precopy_notifier);
+
+ /*
+ * Set ourselves as RamDiscardManager before the plug handler maps the
+ * memory region and exposes it via an address space.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+ RAM_DISCARD_MANAGER(vmem));
}
static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -584,6 +759,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOMEM *vmem = VIRTIO_MEM(dev);
+ /*
+ * The unplug handler unmapped the memory region, it cannot be
+ * found via an address space anymore. Unset ourselves.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
precopy_remove_notifier(&vmem->precopy_notifier);
qemu_unregister_reset(virtio_mem_system_reset, vmem);
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
@@ -591,43 +771,47 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, 0);
virtio_cleanup(vdev);
g_free(vmem->bitmap);
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
}
-static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size)
{
RAMBlock *rb = vmem->memdev->mr.ram_block;
- unsigned long first_zero_bit, last_zero_bit;
- uint64_t offset, length;
- int ret;
- /* Find consecutive unplugged blocks and discard the consecutive range. */
- first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
- while (first_zero_bit < vmem->bitmap_size) {
- offset = first_zero_bit * vmem->block_size;
- last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
- first_zero_bit + 1) - 1;
- length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+ return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
+}
- ret = ram_block_discard_range(rb, offset, length);
- if (ret) {
- error_report("Unexpected error discarding RAM: %s",
- strerror(-ret));
- return -EINVAL;
- }
- first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
- last_zero_bit + 2);
- }
- return 0;
+static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+{
+ /* Make sure all memory is really discarded after migration. */
+ return virtio_mem_for_each_unplugged_range(vmem, NULL,
+ virtio_mem_discard_range_cb);
}
static int virtio_mem_post_load(void *opaque, int version_id)
{
+ VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+ RamDiscardListener *rdl;
+ int ret;
+
+ /*
+ * We started out with all memory discarded and our memory region is mapped
+ * into an address space. Replay, now that we updated the bitmap.
+ */
+ QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+ ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_populate_cb);
+ if (ret) {
+ return ret;
+ }
+ }
+
if (migration_in_incoming_postcopy()) {
return 0;
}
- return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque));
+ return virtio_mem_restore_unplugged(vmem);
}
typedef struct VirtIOMEMMigSanityChecks {
@@ -702,6 +886,7 @@ static const VMStateDescription vmstate_virtio_mem_device = {
.name = "virtio-mem-device",
.minimum_version_id = 1,
.version_id = 1,
+ .priority = MIG_PRI_VIRTIO_MEM,
.post_load = virtio_mem_post_load,
.fields = (VMStateField[]) {
VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
@@ -872,28 +1057,19 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
vmem->block_size = value;
}
-static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
+static int virtio_mem_precopy_exclude_range_cb(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size)
{
void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block);
- unsigned long first_zero_bit, last_zero_bit;
- uint64_t offset, length;
- /*
- * Find consecutive unplugged blocks and exclude them from migration.
- *
- * Note: Blocks cannot get (un)plugged during precopy, no locking needed.
- */
- first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
- while (first_zero_bit < vmem->bitmap_size) {
- offset = first_zero_bit * vmem->block_size;
- last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
- first_zero_bit + 1) - 1;
- length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+ qemu_guest_free_page_hint(host + offset, size);
+ return 0;
+}
- qemu_guest_free_page_hint(host + offset, length);
- first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
- last_zero_bit + 2);
- }
+static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
+{
+ virtio_mem_for_each_unplugged_range(vmem, NULL,
+ virtio_mem_precopy_exclude_range_cb);
}
static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data)
@@ -918,6 +1094,7 @@ static void virtio_mem_instance_init(Object *obj)
notifier_list_init(&vmem->size_change_notifiers);
vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
+ QLIST_INIT(&vmem->rdl_list);
object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
NULL, NULL, NULL);
@@ -937,11 +1114,107 @@ static Property virtio_mem_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+ g_assert(mr == &vmem->memdev->mr);
+ return vmem->block_size;
+}
+
+static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *s)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ uint64_t start_gpa = vmem->addr + s->offset_within_region;
+ uint64_t end_gpa = start_gpa + int128_get64(s->size);
+
+ g_assert(s->mr == &vmem->memdev->mr);
+
+ start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
+ end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
+
+ if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
+ return false;
+ }
+
+ return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
+}
+
+struct VirtIOMEMReplayData {
+ void *fn;
+ void *opaque;
+};
+
+static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
+{
+ struct VirtIOMEMReplayData *data = arg;
+
+ return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+}
+
+static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamPopulate replay_fn,
+ void *opaque)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ struct VirtIOMEMReplayData data = {
+ .fn = replay_fn,
+ .opaque = opaque,
+ };
+
+ g_assert(s->mr == &vmem->memdev->mr);
+ return virtio_mem_for_each_plugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_populated_cb);
+}
+
+static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *s)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+ int ret;
+
+ g_assert(s->mr == &vmem->memdev->mr);
+ rdl->section = memory_region_section_new_copy(s);
+
+ QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
+ ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_populate_cb);
+ if (ret) {
+ error_report("%s: Replaying plugged ranges failed: %s", __func__,
+ strerror(-ret));
+ }
+}
+
+static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+ g_assert(rdl->section->mr == &vmem->memdev->mr);
+ if (vmem->size) {
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+ virtio_mem_notify_discard_cb);
+ }
+ }
+
+ memory_region_section_free_copy(rdl->section);
+ rdl->section = NULL;
+ QLIST_REMOVE(rdl, next);
+}
+
static void virtio_mem_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
device_class_set_props(dc, virtio_mem_properties);
dc->vmsd = &vmstate_virtio_mem;
@@ -957,6 +1230,12 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
vmc->get_memory_region = virtio_mem_get_memory_region;
vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
+
+ rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
+ rdmc->is_populated = virtio_mem_rdm_is_populated;
+ rdmc->replay_populated = virtio_mem_rdm_replay_populated;
+ rdmc->register_listener = virtio_mem_rdm_register_listener;
+ rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
}
static const TypeInfo virtio_mem_info = {
@@ -966,6 +1245,10 @@ static const TypeInfo virtio_mem_info = {
.instance_init = virtio_mem_instance_init,
.class_init = virtio_mem_class_init,
.class_size = sizeof(VirtIOMEMClass),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_RAM_DISCARD_MANAGER },
+ { }
+ },
};
static void virtio_register_types(void)
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 5952471..1af48a1 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -29,6 +29,7 @@
#include "qemu/host-utils.h"
#include "qemu/module.h"
#include "sysemu/kvm.h"
+#include "sysemu/replay.h"
#include "hw/virtio/virtio-mmio.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
@@ -740,6 +741,11 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD;
}
+ /* fd-based ioevents can't be synchronized in record/replay */
+ if (replay_mode != REPLAY_MODE_NONE) {
+ proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD;
+ }
+
if (proxy->legacy) {
memory_region_init_io(&proxy->iomem, OBJECT(d),
&virtio_legacy_mem_ops, proxy,
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index b321604..433060a 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -37,6 +37,7 @@
#include "qemu/range.h"
#include "hw/virtio/virtio-bus.h"
#include "qapi/visitor.h"
+#include "sysemu/replay.h"
#define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
@@ -423,6 +424,11 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
uint64_t val = 0;
+
+ if (vdev == NULL) {
+ return UINT64_MAX;
+ }
+
if (addr < config) {
return virtio_ioport_read(proxy, addr);
}
@@ -454,6 +460,11 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr,
VirtIOPCIProxy *proxy = opaque;
uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+ if (vdev == NULL) {
+ return;
+ }
+
if (addr < config) {
virtio_ioport_write(proxy, addr, val);
return;
@@ -1146,6 +1157,10 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
uint32_t val = 0;
int i;
+ if (vdev == NULL) {
+ return UINT64_MAX;
+ }
+
switch (addr) {
case VIRTIO_PCI_COMMON_DFSELECT:
val = proxy->dfselect;
@@ -1229,6 +1244,10 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
VirtIOPCIProxy *proxy = opaque;
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+ if (vdev == NULL) {
+ return;
+ }
+
switch (addr) {
case VIRTIO_PCI_COMMON_DFSELECT:
proxy->dfselect = val;
@@ -1330,6 +1349,11 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
unsigned size)
{
+ VirtIOPCIProxy *proxy = opaque;
+ if (virtio_bus_get_device(&proxy->bus) == NULL) {
+ return UINT64_MAX;
+ }
+
return 0;
}
@@ -1367,7 +1391,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
uint64_t val;
if (vdev == NULL) {
- return 0;
+ return UINT64_MAX;
}
val = qatomic_xchg(&vdev->isr, 0);
@@ -1388,7 +1412,7 @@ static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
uint64_t val;
if (vdev == NULL) {
- return 0;
+ return UINT64_MAX;
}
switch (size) {
@@ -1760,6 +1784,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
}
+ /* fd-based ioevents can't be synchronized in record/replay */
+ if (replay_mode != REPLAY_MODE_NONE) {
+ proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+ }
+
/*
* virtio pci bar layout used by default.
* subclasses can re-arrange things if needed.
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index ab516ac..6dcf3ba 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3728,6 +3728,10 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
int i, n, r, err;
+ /*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
memory_region_transaction_begin();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
VirtQueue *vq = &vdev->vq[n];
@@ -3766,6 +3770,10 @@ assign_error:
r = virtio_bus_set_host_notifier(qbus, n, false);
assert(r >= 0);
}
+ /*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
memory_region_transaction_commit();
while (--i >= 0) {
@@ -3790,6 +3798,10 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
int n, r;
+ /*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
memory_region_transaction_begin();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
VirtQueue *vq = &vdev->vq[n];
@@ -3801,6 +3813,10 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
r = virtio_bus_set_host_notifier(qbus, n, false);
assert(r >= 0);
}
+ /*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
memory_region_transaction_commit();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
diff --git a/include/block/aio.h b/include/block/aio.h
index 10fcae1..807edce 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -292,19 +292,44 @@ void aio_context_acquire(AioContext *ctx);
void aio_context_release(AioContext *ctx);
/**
+ * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
+ * run only once and as soon as possible.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+ const char *name);
+
+/**
* aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
* only once and as soon as possible.
+ *
+ * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the
+ * name string.
*/
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_schedule_oneshot(ctx, cb, opaque) \
+ aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb)))
/**
- * aio_bh_new: Allocate a new bottom half structure.
+ * aio_bh_new_full: Allocate a new bottom half structure.
*
* Bottom halves are lightweight callbacks whose invocation is guaranteed
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
* is opaque and must be allocated prior to its use.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+ const char *name);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
*/
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_new(ctx, cb, opaque) \
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
/**
* aio_notify: Force processing of pending events.
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b116f7c..c3d417d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -42,6 +42,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass;
DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass,
IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION)
+#define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager"
+typedef struct RamDiscardManagerClass RamDiscardManagerClass;
+typedef struct RamDiscardManager RamDiscardManager;
+DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass,
+ RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER);
+
#ifdef CONFIG_FUZZ
void fuzz_dma_read_cb(size_t addr,
size_t len,
@@ -65,6 +71,28 @@ struct ReservedRegion {
unsigned type;
};
+/**
+ * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
+ *
+ * @mr: the region, or %NULL if empty
+ * @fv: the flat view of the address space the region is mapped in
+ * @offset_within_region: the beginning of the section, relative to @mr's start
+ * @size: the size of the section; will not exceed @mr's boundaries
+ * @offset_within_address_space: the address of the first byte of the section
+ * relative to the region's address space
+ * @readonly: writes to this section are ignored
+ * @nonvolatile: this section is non-volatile
+ */
+struct MemoryRegionSection {
+ Int128 size;
+ MemoryRegion *mr;
+ FlatView *fv;
+ hwaddr offset_within_region;
+ hwaddr offset_within_address_space;
+ bool readonly;
+ bool nonvolatile;
+};
+
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
/* See address_space_translate: bit 0 is read, bit 1 is write. */
@@ -448,6 +476,206 @@ struct IOMMUMemoryRegionClass {
Error **errp);
};
+typedef struct RamDiscardListener RamDiscardListener;
+typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+
+struct RamDiscardListener {
+ /*
+ * @notify_populate:
+ *
+ * Notification that previously discarded memory is about to get populated.
+ * Listeners are able to object. If any listener objects, already
+ * successfully notified listeners are notified about a discard again.
+ *
+ * @rdl: the #RamDiscardListener getting notified
+ * @section: the #MemoryRegionSection to get populated. The section
+ * is aligned within the memory region to the minimum granularity
+ * unless it would exceed the registered section.
+ *
+ * Returns 0 on success. If the notification is rejected by the listener,
+ * an error is returned.
+ */
+ NotifyRamPopulate notify_populate;
+
+ /*
+ * @notify_discard:
+ *
+ * Notification that previously populated memory was discarded successfully
+ * and listeners should drop all references to such memory and prevent
+ * new population (e.g., unmap).
+ *
+ * @rdl: the #RamDiscardListener getting notified
+ * @section: the #MemoryRegionSection to get populated. The section
+ * is aligned within the memory region to the minimum granularity
+ * unless it would exceed the registered section.
+ */
+ NotifyRamDiscard notify_discard;
+
+ /*
+ * @double_discard_supported:
+ *
+ * The listener suppors getting @notify_discard notifications that span
+ * already discarded parts.
+ */
+ bool double_discard_supported;
+
+ MemoryRegionSection *section;
+ QLIST_ENTRY(RamDiscardListener) next;
+};
+
+static inline void ram_discard_listener_init(RamDiscardListener *rdl,
+ NotifyRamPopulate populate_fn,
+ NotifyRamDiscard discard_fn,
+ bool double_discard_supported)
+{
+ rdl->notify_populate = populate_fn;
+ rdl->notify_discard = discard_fn;
+ rdl->double_discard_supported = double_discard_supported;
+}
+
+typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
+
+/*
+ * RamDiscardManagerClass:
+ *
+ * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
+ * regions are currently populated to be used/accessed by the VM, notifying
+ * after parts were discarded (freeing up memory) and before parts will be
+ * populated (consuming memory), to be used/acessed by the VM.
+ *
+ * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
+ * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
+ * mapped.
+ *
+ * The #RamDiscardManager is intended to be used by technologies that are
+ * incompatible with discarding of RAM (e.g., VFIO, which may pin all
+ * memory inside a #MemoryRegion), and require proper coordination to only
+ * map the currently populated parts, to hinder parts that are expected to
+ * remain discarded from silently getting populated and consuming memory.
+ * Technologies that support discarding of RAM don't have to bother and can
+ * simply map the whole #MemoryRegion.
+ *
+ * An example #RamDiscardManager is virtio-mem, which logically (un)plugs
+ * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
+ * Logically unplugging memory consists of discarding RAM. The VM agreed to not
+ * access unplugged (discarded) memory - especially via DMA. virtio-mem will
+ * properly coordinate with listeners before memory is plugged (populated),
+ * and after memory is unplugged (discarded).
+ *
+ * Listeners are called in multiples of the minimum granularity (unless it
+ * would exceed the registered range) and changes are aligned to the minimum
+ * granularity within the #MemoryRegion. Listeners have to prepare for memory
+ * becomming discarded in a different granularity than it was populated and the
+ * other way around.
+ */
+struct RamDiscardManagerClass {
+ /* private */
+ InterfaceClass parent_class;
+
+ /* public */
+
+ /**
+ * @get_min_granularity:
+ *
+ * Get the minimum granularity in which listeners will get notified
+ * about changes within the #MemoryRegion via the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @mr: the #MemoryRegion
+ *
+ * Returns the minimum granularity.
+ */
+ uint64_t (*get_min_granularity)(const RamDiscardManager *rdm,
+ const MemoryRegion *mr);
+
+ /**
+ * @is_populated:
+ *
+ * Check whether the given #MemoryRegionSection is completely populated
+ * (i.e., no parts are currently discarded) via the #RamDiscardManager.
+ * There are no alignment requirements.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ *
+ * Returns whether the given range is completely populated.
+ */
+ bool (*is_populated)(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section);
+
+ /**
+ * @replay_populated:
+ *
+ * Call the #ReplayRamPopulate callback for all populated parts within the
+ * #MemoryRegionSection via the #RamDiscardManager.
+ *
+ * In case any call fails, no further calls are made.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamPopulate callback
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
+ */
+ int (*replay_populated)(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamPopulate replay_fn, void *opaque);
+
+ /**
+ * @register_listener:
+ *
+ * Register a #RamDiscardListener for the given #MemoryRegionSection and
+ * immediately notify the #RamDiscardListener about all populated parts
+ * within the #MemoryRegionSection via the #RamDiscardManager.
+ *
+ * In case any notification fails, no further notifications are triggered
+ * and an error is logged.
+ *
+ * @rdm: the #RamDiscardManager
+ * @rdl: the #RamDiscardListener
+ * @section: the #MemoryRegionSection
+ */
+ void (*register_listener)(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+
+ /**
+ * @unregister_listener:
+ *
+ * Unregister a previously registered #RamDiscardListener via the
+ * #RamDiscardManager after notifying the #RamDiscardListener about all
+ * populated parts becoming unpopulated within the registered
+ * #MemoryRegionSection.
+ *
+ * @rdm: the #RamDiscardManager
+ * @rdl: the #RamDiscardListener
+ */
+ void (*unregister_listener)(RamDiscardManager *rdm,
+ RamDiscardListener *rdl);
+};
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr);
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section);
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamPopulate replay_fn,
+ void *opaque);
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section);
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl);
+
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
@@ -494,6 +722,7 @@ struct MemoryRegion {
const char *name;
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
+ RamDiscardManager *rdm; /* Only for RAM */
};
struct IOMMUMemoryRegion {
@@ -825,28 +1054,6 @@ typedef bool (*flatview_cb)(Int128 start,
*/
void flatview_for_each_range(FlatView *fv, flatview_cb cb, void *opaque);
-/**
- * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
- *
- * @mr: the region, or %NULL if empty
- * @fv: the flat view of the address space the region is mapped in
- * @offset_within_region: the beginning of the section, relative to @mr's start
- * @size: the size of the section; will not exceed @mr's boundaries
- * @offset_within_address_space: the address of the first byte of the section
- * relative to the region's address space
- * @readonly: writes to this section are ignored
- * @nonvolatile: this section is non-volatile
- */
-struct MemoryRegionSection {
- Int128 size;
- MemoryRegion *mr;
- FlatView *fv;
- hwaddr offset_within_region;
- hwaddr offset_within_address_space;
- bool readonly;
- bool nonvolatile;
-};
-
static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
MemoryRegionSection *b)
{
@@ -860,6 +1067,26 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
}
/**
+ * memory_region_section_new_copy: Copy a memory region section
+ *
+ * Allocate memory for a new copy, copy the memory region section, and
+ * properly take a reference on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
+
+/**
+ * memory_region_section_new_copy: Free a copied memory region section
+ *
+ * Free a copy of a memory section created via memory_region_section_new_copy().
+ * properly dropping references on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+void memory_region_section_free_copy(MemoryRegionSection *s);
+
+/**
* memory_region_init: Initialize a memory region
*
* The region typically acts as a container for other memory regions. Use
@@ -2024,6 +2251,41 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr);
bool memory_region_is_mapped(MemoryRegion *mr);
/**
+ * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * The #RamDiscardManager cannot change while a memory region is mapped.
+ *
+ * @mr: the #MemoryRegion
+ */
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr);
+
+/**
+ * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
+ * #RamDiscardManager assigned
+ *
+ * @mr: the #MemoryRegion
+ */
+static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
+{
+ return !!memory_region_get_ram_discard_manager(mr);
+}
+
+/**
+ * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
+ * that does not cover RAM, or a #MemoryRegion that already has a
+ * #RamDiscardManager assigned.
+ *
+ * @mr: the #MemoryRegion
+ * @rdm: #RamDiscardManager to set
+ */
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm);
+
+/**
* memory_region_find: translate an address/size relative to a
* MemoryRegion into a #MemoryRegionSection.
*
@@ -2632,6 +2894,12 @@ static inline MemOp devend_memop(enum device_endian end)
int ram_block_discard_disable(bool state);
/*
+ * See ram_block_discard_disable(): only disable uncoordinated discards,
+ * keeping coordinated discards (via the RamDiscardManager) enabled.
+ */
+int ram_block_uncoordinated_discard_disable(bool state);
+
+/*
* Inhibit technologies that disable discarding of pages in RAM blocks.
*
* Returns 0 if successful. Returns -EBUSY if discards are already set to
@@ -2640,12 +2908,20 @@ int ram_block_discard_disable(bool state);
int ram_block_discard_require(bool state);
/*
- * Test if discarding of memory in ram blocks is disabled.
+ * See ram_block_discard_require(): only inhibit technologies that disable
+ * uncoordinated discarding of pages in RAM blocks, allowing co-existance with
+ * technologies that only inhibit uncoordinated discards (via the
+ * RamDiscardManager).
+ */
+int ram_block_coordinated_discard_require(bool state);
+
+/*
+ * Test if any discarding of memory in ram blocks is disabled.
*/
bool ram_block_discard_is_disabled(void);
/*
- * Test if discarding of memory in ram blocks is required to work reliably.
+ * Test if any discarding of memory in ram blocks is required to work reliably.
*/
bool ram_block_discard_is_required(void);
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index c172cbe..5902c04 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -19,6 +19,7 @@
typedef struct BlockConf {
BlockBackend *blk;
+ OnOffAuto backend_defaults;
uint32_t physical_block_size;
uint32_t logical_block_size;
uint32_t min_io_size;
@@ -48,6 +49,8 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
}
#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \
+ DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \
+ _conf.backend_defaults, ON_OFF_AUTO_AUTO), \
DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \
_conf.logical_block_size), \
DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index f05219f..637652a 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -12,6 +12,7 @@
#include "hw/ppc/spapr_xive.h" /* For SpaprXive */
#include "hw/ppc/xics.h" /* For ICSState */
#include "hw/ppc/spapr_tpm_proxy.h"
+#include "hw/ppc/vof.h"
struct SpaprVioBus;
struct SpaprPhbState;
@@ -74,8 +75,10 @@ typedef enum {
#define SPAPR_CAP_CCF_ASSIST 0x09
/* Implements PAPR FWNMI option */
#define SPAPR_CAP_FWNMI 0x0A
+/* Support H_RPT_INVALIDATE */
+#define SPAPR_CAP_RPT_INVALIDATE 0x0B
/* Num Caps */
-#define SPAPR_CAP_NUM (SPAPR_CAP_FWNMI + 1)
+#define SPAPR_CAP_NUM (SPAPR_CAP_RPT_INVALIDATE + 1)
/*
* Capability Values
@@ -180,6 +183,7 @@ struct SpaprMachineState {
uint64_t kernel_addr;
uint32_t initrd_base;
long initrd_size;
+ Vof *vof;
uint64_t rtc_offset; /* Now used only during incoming migration */
struct PPCTimebase tb;
bool has_graphics;
@@ -398,10 +402,13 @@ struct SpaprMachineState {
#define H_CPU_CHAR_THR_RECONF_TRIG PPC_BIT(6)
#define H_CPU_CHAR_CACHE_COUNT_DIS PPC_BIT(7)
#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST PPC_BIT(9)
+
#define H_CPU_BEHAV_FAVOUR_SECURITY PPC_BIT(0)
#define H_CPU_BEHAV_L1D_FLUSH_PR PPC_BIT(1)
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR PPC_BIT(2)
#define H_CPU_BEHAV_FLUSH_COUNT_CACHE PPC_BIT(5)
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY PPC_BIT(7)
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS PPC_BIT(8)
/* Each control block has to be on a 4K boundary */
#define H_CB_ALIGNMENT 4096
@@ -542,8 +549,9 @@ struct SpaprMachineState {
#define H_SCM_UNBIND_MEM 0x3F0
#define H_SCM_UNBIND_ALL 0x3FC
#define H_SCM_HEALTH 0x400
+#define H_RPT_INVALIDATE 0x448
-#define MAX_HCALL_OPCODE H_SCM_HEALTH
+#define MAX_HCALL_OPCODE H_RPT_INVALIDATE
/* The hcalls above are standardized in PAPR and implemented by pHyp
* as well.
@@ -558,7 +566,9 @@ struct SpaprMachineState {
/* Client Architecture support */
#define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2)
#define KVMPPC_H_UPDATE_DT (KVMPPC_HCALL_BASE + 0x3)
-#define KVMPPC_HCALL_MAX KVMPPC_H_UPDATE_DT
+/* 0x4 was used for KVMPPC_H_UPDATE_PHANDLE in SLOF */
+#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5)
+#define KVMPPC_HCALL_MAX KVMPPC_H_VOF_CLIENT
/*
* The hcall range 0xEF00 to 0xEF80 is reserved for use in facilitating
@@ -770,7 +780,7 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
#define SPAPR_IS_PCI_LIOBN(liobn) (!!((liobn) & 0x80000000))
#define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff)
-#define RTAS_SIZE 2048
+#define RTAS_MIN_SIZE 20 /* hv_rtas_size in SLOF */
#define RTAS_ERROR_LOG_MAX 2048
/* Offset from rtas-base where error log is placed */
@@ -932,6 +942,7 @@ extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
extern const VMStateDescription vmstate_spapr_cap_large_decr;
extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
extern const VMStateDescription vmstate_spapr_cap_fwnmi;
+extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
{
@@ -956,4 +967,16 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
hwaddr spapr_get_rtas_addr(void);
bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp);
+void spapr_vof_quiesce(MachineState *ms);
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+ void *val, int vallen);
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args);
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+ CPUState *cs,
+ target_ulong ovec_addr);
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
+
#endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/vof.h b/include/hw/ppc/vof.h
new file mode 100644
index 0000000..640be46
--- /dev/null
+++ b/include/hw/ppc/vof.h
@@ -0,0 +1,58 @@
+/*
+ * Virtual Open Firmware
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_VOF_H
+#define HW_VOF_H
+
+typedef struct Vof {
+ uint64_t top_addr; /* copied from rma_size */
+ GArray *claimed; /* array of SpaprOfClaimed */
+ uint64_t claimed_base;
+ GHashTable *of_instances; /* ihandle -> SpaprOfInstance */
+ uint32_t of_instance_last;
+ char *bootargs;
+ long fw_size;
+} Vof;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+ target_ulong args_real);
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, uint64_t align);
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp);
+void vof_cleanup(Vof *vof);
+void vof_build_dt(void *fdt, Vof *vof);
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+ const char *prop, const char *path);
+
+#define TYPE_VOF_MACHINE_IF "vof-machine-if"
+
+typedef struct VofMachineIfClass VofMachineIfClass;
+DECLARE_CLASS_CHECKERS(VofMachineIfClass, VOF_MACHINE, TYPE_VOF_MACHINE_IF)
+
+struct VofMachineIfClass {
+ InterfaceClass parent;
+ target_ulong (*client_architecture_support)(MachineState *ms, CPUState *cs,
+ target_ulong vec);
+ void (*quiesce)(MachineState *ms);
+ bool (*setprop)(MachineState *ms, const char *path, const char *propname,
+ void *val, int vallen);
+};
+
+/*
+ * Initial stack size is from
+ * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#REF27292
+ *
+ * "Client programs shall be invoked with a valid stack pointer (r1) with
+ * at least 32K bytes of memory available for stack growth".
+ */
+#define VOF_STACK_SIZE 0x8000
+
+#define VOF_MEM_READ(pa, buf, size) \
+ address_space_read(&address_space_memory, \
+ (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size))
+#define VOF_MEM_WRITE(pa, buf, size) \
+ address_space_write(&address_space_memory, \
+ (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size))
+
+#endif /* HW_VOF_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 6141162..8af11b0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -88,9 +88,11 @@ typedef struct VFIOContainer {
uint64_t dirty_pgsizes;
uint64_t max_dirty_bitmap_size;
unsigned long pgsizes;
+ unsigned int dma_max_mappings;
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
QLIST_HEAD(, VFIOGroup) group_list;
+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
QLIST_ENTRY(VFIOContainer) next;
} VFIOContainer;
@@ -102,6 +104,16 @@ typedef struct VFIOGuestIOMMU {
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
} VFIOGuestIOMMU;
+typedef struct VFIORamDiscardListener {
+ VFIOContainer *container;
+ MemoryRegion *mr;
+ hwaddr offset_within_address_space;
+ hwaddr size;
+ uint64_t granularity;
+ RamDiscardListener listener;
+ QLIST_ENTRY(VFIORamDiscardListener) next;
+} VFIORamDiscardListener;
+
typedef struct VFIOHostDMAWindow {
hwaddr min_iova;
hwaddr max_iova;
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
index 4eeb82d..9a6e348 100644
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@@ -67,6 +67,9 @@ struct VirtIOMEM {
/* don't migrate unplugged memory */
NotifierWithReturn precopy_notifier;
+
+ /* listeners to notify on plug/unplug activity. */
+ QLIST_HEAD(, RamDiscardListener) rdl_list;
};
struct VirtIOMEMClass {
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 8df7b69..017c036 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -153,6 +153,7 @@ typedef enum {
MIG_PRI_DEFAULT = 0,
MIG_PRI_IOMMU, /* Must happen before PCI devices */
MIG_PRI_PCI_BUS, /* Must happen before IOMMU */
+ MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */
MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */
MIG_PRI_GICV3, /* Must happen before the ITS */
MIG_PRI_MAX,
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 98aef56..8dbc6fc 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -294,7 +294,9 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
void qemu_fd_register(int fd);
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+#define qemu_bh_new(cb, opaque) \
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)))
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
void qemu_bh_schedule_idle(QEMUBH *bh);
enum {
diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h
index 215d01b..204cfb8 100644
--- a/include/standard-headers/asm-x86/kvm_para.h
+++ b/include/standard-headers/asm-x86/kvm_para.h
@@ -33,6 +33,8 @@
#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_FEATURE_ASYNC_PF_INT 14
#define KVM_FEATURE_MSI_EXT_DEST_ID 15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE 16
+#define KVM_FEATURE_MIGRATION_CONTROL 17
#define KVM_HINTS_REALTIME 0
@@ -54,6 +56,7 @@
#define MSR_KVM_POLL_CONTROL 0x4b564d05
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08
struct kvm_steal_time {
uint64_t steal;
@@ -90,6 +93,16 @@ struct kvm_clock_pairing {
/* MSR_KVM_ASYNC_PF_INT */
#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY (1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0)
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index a61ae52..352b51f 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -167,6 +167,13 @@ extern "C" {
#define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */
#define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */
+/* 64 bpp RGB */
+#define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */
+
+#define DRM_FORMAT_ARGB16161616 fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */
+
/*
* Floating point 64bpp RGB
* IEEE 754-2008 binary16 half-precision float
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 218d944..053d3fa 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -233,7 +233,7 @@ enum tunable_id {
ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
/*
* Add your fresh new tunable attribute above and remember to update
- * tunable_strings[] in net/core/ethtool.c
+ * tunable_strings[] in net/ethtool/common.c
*/
__ETHTOOL_TUNABLE_COUNT,
};
@@ -297,7 +297,7 @@ enum phy_tunable_id {
ETHTOOL_PHY_EDPD,
/*
* Add your fresh new phy tunable attribute above and remember to update
- * phy_tunable_strings[] in net/core/ethtool.c
+ * phy_tunable_strings[] in net/ethtool/common.c
*/
__ETHTOOL_PHY_TUNABLE_COUNT,
};
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index c403b9c..b5e86b4 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -611,6 +611,7 @@
#define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */
#define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */
#define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */
+#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */
#define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */
#define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index f0c35ce..4fe842c 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -54,7 +54,7 @@
#define VIRTIO_ID_SOUND 25 /* virtio sound */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
-#define VIRTIO_ID_BT 28 /* virtio bluetooth */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_BT 40 /* virtio bluetooth */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h
index be44321..3a23488 100644
--- a/include/standard-headers/linux/virtio_vsock.h
+++ b/include/standard-headers/linux/virtio_vsock.h
@@ -38,6 +38,9 @@
#include "standard-headers/linux/virtio_ids.h"
#include "standard-headers/linux/virtio_config.h"
+/* The feature bitmap for virtio vsock */
+#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */
+
struct virtio_vsock_config {
uint64_t guest_cid;
} QEMU_PACKED;
@@ -65,6 +68,7 @@ struct virtio_vsock_hdr {
enum virtio_vsock_type {
VIRTIO_VSOCK_TYPE_STREAM = 1,
+ VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
};
enum virtio_vsock_op {
@@ -91,4 +95,9 @@ enum virtio_vsock_shutdown {
VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
};
+/* VIRTIO_VSOCK_OP_RW flags values */
+enum virtio_vsock_rw {
+ VIRTIO_VSOCK_SEQ_EOR = 1,
+};
+
#endif /* _LINUX_VIRTIO_VSOCK_H */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index b6a0eaa..3d2ce99 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
__u32 reserved[12];
};
+struct kvm_arm_copy_mte_tags {
+ __u64 guest_ipa;
+ __u64 length;
+ void *addr;
+ __u64 flags;
+ __u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST 0
+#define KVM_ARM_TAGS_FROM_GUEST 1
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index f94f65d..1567a32 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h
@@ -72,6 +72,9 @@
#define MADV_COLD 20 /* deactivate these pages */
#define MADV_PAGEOUT 21 /* reclaim these pages */
+#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
+#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
index 6de5a7f..f211961 100644
--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
@@ -863,8 +863,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+#define __NR_quotactl_fd 443
+__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h
index 57dc2ac..40b210c 100644
--- a/linux-headers/asm-mips/mman.h
+++ b/linux-headers/asm-mips/mman.h
@@ -98,6 +98,9 @@
#define MADV_COLD 20 /* deactivate these pages */
#define MADV_PAGEOUT 21 /* reclaim these pages */
+#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
+#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
index fce51fe..09cd297 100644
--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
@@ -372,6 +372,7 @@
#define __NR_process_madvise (__NR_Linux + 440)
#define __NR_epoll_pwait2 (__NR_Linux + 441)
#define __NR_mount_setattr (__NR_Linux + 442)
+#define __NR_quotactl_fd (__NR_Linux + 443)
#define __NR_landlock_create_ruleset (__NR_Linux + 444)
#define __NR_landlock_add_rule (__NR_Linux + 445)
#define __NR_landlock_restrict_self (__NR_Linux + 446)
diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
index 0996001..780e0ce 100644
--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
@@ -348,6 +348,7 @@
#define __NR_process_madvise (__NR_Linux + 440)
#define __NR_epoll_pwait2 (__NR_Linux + 441)
#define __NR_mount_setattr (__NR_Linux + 442)
+#define __NR_quotactl_fd (__NR_Linux + 443)
#define __NR_landlock_create_ruleset (__NR_Linux + 444)
#define __NR_landlock_add_rule (__NR_Linux + 445)
#define __NR_landlock_restrict_self (__NR_Linux + 446)
diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
index 954303a..06a2b3b 100644
--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
@@ -418,6 +418,7 @@
#define __NR_process_madvise (__NR_Linux + 440)
#define __NR_epoll_pwait2 (__NR_Linux + 441)
#define __NR_mount_setattr (__NR_Linux + 442)
+#define __NR_quotactl_fd (__NR_Linux + 443)
#define __NR_landlock_create_ruleset (__NR_Linux + 444)
#define __NR_landlock_add_rule (__NR_Linux + 445)
#define __NR_landlock_restrict_self (__NR_Linux + 446)
diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
index 9155778..cd5a8a4 100644
--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
@@ -425,6 +425,7 @@
#define __NR_process_madvise 440
#define __NR_epoll_pwait2 441
#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
#define __NR_landlock_create_ruleset 444
#define __NR_landlock_add_rule 445
#define __NR_landlock_restrict_self 446
diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
index 3cefa88..8458eff 100644
--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
@@ -397,6 +397,7 @@
#define __NR_process_madvise 440
#define __NR_epoll_pwait2 441
#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
#define __NR_landlock_create_ruleset 444
#define __NR_landlock_add_rule 445
#define __NR_landlock_restrict_self 446
diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
index e8cd343..0c3cd29 100644
--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
@@ -415,6 +415,7 @@
#define __NR_process_madvise 440
#define __NR_epoll_pwait2 441
#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
#define __NR_landlock_create_ruleset 444
#define __NR_landlock_add_rule 445
#define __NR_landlock_restrict_self 446
diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
index 86830e1..8dfc08b 100644
--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
@@ -363,6 +363,7 @@
#define __NR_process_madvise 440
#define __NR_epoll_pwait2 441
#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
#define __NR_landlock_create_ruleset 444
#define __NR_landlock_add_rule 445
#define __NR_landlock_restrict_self 446
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 0662f64..a6c327f 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -159,6 +159,19 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
+struct kvm_sregs2 {
+ /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 flags;
+ __u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index 8f6ac8c..66e96c0 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNISTD_32_H
-#define _ASM_X86_UNISTD_32_H 1
+#ifndef _ASM_UNISTD_32_H
+#define _ASM_UNISTD_32_H
#define __NR_restart_syscall 0
#define __NR_exit 1
@@ -433,9 +433,10 @@
#define __NR_process_madvise 440
#define __NR_epoll_pwait2 441
#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
#define __NR_landlock_create_ruleset 444
#define __NR_landlock_add_rule 445
#define __NR_landlock_restrict_self 446
-#endif /* _ASM_X86_UNISTD_32_H */
+#endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index bb187a9..b8ff6f1 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNISTD_64_H
-#define _ASM_X86_UNISTD_64_H 1
+#ifndef _ASM_UNISTD_64_H
+#define _ASM_UNISTD_64_H
#define __NR_read 0
#define __NR_write 1
@@ -355,9 +355,10 @@
#define __NR_process_madvise 440
#define __NR_epoll_pwait2 441
#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
#define __NR_landlock_create_ruleset 444
#define __NR_landlock_add_rule 445
#define __NR_landlock_restrict_self 446
-#endif /* _ASM_X86_UNISTD_64_H */
+#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index 4edd010..06a1097 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNISTD_X32_H
-#define _ASM_X86_UNISTD_X32_H 1
+#ifndef _ASM_UNISTD_X32_H
+#define _ASM_UNISTD_X32_H
#define __NR_read (__X32_SYSCALL_BIT + 0)
#define __NR_write (__X32_SYSCALL_BIT + 1)
@@ -308,6 +308,7 @@
#define __NR_process_madvise (__X32_SYSCALL_BIT + 440)
#define __NR_epoll_pwait2 (__X32_SYSCALL_BIT + 441)
#define __NR_mount_setattr (__X32_SYSCALL_BIT + 442)
+#define __NR_quotactl_fd (__X32_SYSCALL_BIT + 443)
#define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444)
#define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445)
#define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446)
@@ -349,4 +350,4 @@
#define __NR_pwritev2 (__X32_SYSCALL_BIT + 547)
-#endif /* _ASM_X86_UNISTD_X32_H */
+#endif /* _ASM_UNISTD_X32_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 20d6a26..bcaf66c 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -280,6 +280,9 @@ struct kvm_xen_exit {
/* Encounter unexpected vm-exit reason */
#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4
+/* Flags that describe what fields in emulation_failure hold valid data. */
+#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
@@ -383,6 +386,25 @@ struct kvm_run {
__u32 ndata;
__u64 data[16];
} internal;
+ /*
+ * KVM_INTERNAL_ERROR_EMULATION
+ *
+ * "struct emulation_failure" is an overlay of "struct internal"
+ * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of
+ * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error
+ * sub-types, this struct is ABI! It also needs to be backwards
+ * compatible with "struct internal". Take special care that
+ * "ndata" is correct, that new fields are enumerated in "flags",
+ * and that each flag enumerates fields that are 64-bit aligned
+ * and sized (so that ndata+internal.data[] is valid/accurate).
+ */
+ struct {
+ __u32 suberror;
+ __u32 ndata;
+ __u64 flags;
+ __u8 insn_size;
+ __u8 insn_bytes[15];
+ } emulation_failure;
/* KVM_EXIT_OSI */
struct {
__u64 gprs[32];
@@ -1083,6 +1105,13 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SGX_ATTRIBUTE 196
#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
#define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_HYPERV_ENFORCE_CPUID 199
+#define KVM_CAP_SREGS2 200
+#define KVM_CAP_EXIT_HYPERCALL 201
+#define KVM_CAP_PPC_RPT_INVALIDATE 202
+#define KVM_CAP_BINARY_STATS_FD 203
+#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
+#define KVM_CAP_ARM_MTE 205
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1428,6 +1457,7 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PMU_EVENT_FILTER */
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
+#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
@@ -1621,6 +1651,9 @@ struct kvm_xen_hvm_attr {
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
+#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
+#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
+
struct kvm_xen_vcpu_attr {
__u16 type;
__u16 pad[3];
@@ -1899,4 +1932,76 @@ struct kvm_dirty_gfn {
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
+/**
+ * struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
+ * @flags: Some extra information for header, always 0 for now.
+ * @name_size: The size in bytes of the memory which contains statistics
+ * name string including trailing '\0'. The memory is allocated
+ * at the send of statistics descriptor.
+ * @num_desc: The number of statistics the vm or vcpu has.
+ * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed
+ * by vm/vcpu stats fd.
+ * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file
+ * pointd by vm/vcpu stats fd.
+ * @data_offset: The offset of the vm/vcpu stats' data block in the file
+ * pointed by vm/vcpu stats fd.
+ *
+ * This is the header userspace needs to read from stats fd before any other
+ * readings. It is used by userspace to discover all the information about the
+ * vm/vcpu's binary statistics.
+ * Userspace reads this header from the start of the vm/vcpu's stats fd.
+ */
+struct kvm_stats_header {
+ __u32 flags;
+ __u32 name_size;
+ __u32 num_desc;
+ __u32 id_offset;
+ __u32 desc_offset;
+ __u32 data_offset;
+};
+
+#define KVM_STATS_TYPE_SHIFT 0
+#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK
+
+#define KVM_STATS_UNIT_SHIFT 4
+#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+
+#define KVM_STATS_BASE_SHIFT 8
+#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2
+
+/**
+ * struct kvm_stats_desc - Descriptor of a KVM statistics.
+ * @flags: Annotations of the stats, like type, unit, etc.
+ * @exponent: Used together with @flags to determine the unit.
+ * @size: The number of data items for this stats.
+ * Every data item is of type __u64.
+ * @offset: The offset of the stats to the start of stat structure in
+ * struture kvm or kvm_vcpu.
+ * @unused: Unused field for future usage. Always 0 for now.
+ * @name: The name string for the stats. Its size is indicated by the
+ * &kvm_stats_header->name_size.
+ */
+struct kvm_stats_desc {
+ __u32 flags;
+ __s16 exponent;
+ __u16 size;
+ __u32 offset;
+ __u32 unused;
+ char name[];
+};
+
+#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
+
#endif /* __LINUX_KVM_H */
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index b9ac97b..8479af5 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -31,7 +31,8 @@
UFFD_FEATURE_MISSING_SHMEM | \
UFFD_FEATURE_SIGBUS | \
UFFD_FEATURE_THREAD_ID | \
- UFFD_FEATURE_MINOR_HUGETLBFS)
+ UFFD_FEATURE_MINOR_HUGETLBFS | \
+ UFFD_FEATURE_MINOR_SHMEM)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -80,8 +81,8 @@
struct uffdio_zeropage)
#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
struct uffdio_writeprotect)
-#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \
- struct uffdio_continue)
+#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
+ struct uffdio_continue)
/* read() structure */
struct uffd_msg {
@@ -185,6 +186,9 @@ struct uffdio_api {
* UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
* can be intercepted (via REGISTER_MODE_MINOR) for
* hugetlbfs-backed pages.
+ *
+ * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+ * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -196,6 +200,7 @@ struct uffdio_api {
#define UFFD_FEATURE_SIGBUS (1<<7)
#define UFFD_FEATURE_THREAD_ID (1<<8)
#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
__u64 features;
__u64 ioctls;
diff --git a/pc-bios/README b/pc-bios/README
index c101c9a..d344e3b 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -16,6 +16,10 @@
https://github.com/aik/SLOF, and the image currently in qemu is
built from git tag qemu-slof-20210217.
+- VOF (Virtual Open Firmware) is a minimalistic firmware to work with
+ -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and
+ QEMU implements parts of the IEEE 1275 Open Firmware interface.
+
- sgabios (the Serial Graphics Adapter option ROM) provides a means for
legacy x86 software to communicate with an attached serial console as
if a video card were attached. The master sources reside in a subversion
diff --git a/pc-bios/u-boot.e500 b/pc-bios/u-boot.e500
index d2e29f8..8e635c8 100644
--- a/pc-bios/u-boot.e500
+++ b/pc-bios/u-boot.e500
Binary files differ
diff --git a/pc-bios/vof-nvram.bin b/pc-bios/vof-nvram.bin
new file mode 100644
index 0000000..d183901
--- /dev/null
+++ b/pc-bios/vof-nvram.bin
Binary files differ
diff --git a/pc-bios/vof.bin b/pc-bios/vof.bin
new file mode 100755
index 0000000..300cb7c
--- /dev/null
+++ b/pc-bios/vof.bin
Binary files differ
diff --git a/pc-bios/vof/Makefile b/pc-bios/vof/Makefile
new file mode 100644
index 0000000..aa1678c
--- /dev/null
+++ b/pc-bios/vof/Makefile
@@ -0,0 +1,23 @@
+all: build-all
+
+build-all: vof.bin
+
+CROSS ?=
+CC = $(CROSS)gcc
+LD = $(CROSS)ld
+OBJCOPY = $(CROSS)objcopy
+
+%.o: %.S
+ $(CC) -m32 -mbig-endian -mcpu=power4 -c -o $@ $<
+
+%.o: %.c
+ $(CC) -m32 -mbig-endian -mcpu=power4 -c -fno-stack-protector -o $@ $<
+
+vof.elf: entry.o main.o ci.o bootmem.o libc.o
+ $(LD) -nostdlib -e_start -Tvof.lds -EB -o $@ $^
+
+%.bin: %.elf
+ $(OBJCOPY) -O binary -j .text -j .data -j .toc -j .got2 $^ $@
+
+clean:
+ rm -f *.o vof.bin vof.elf *~
diff --git a/pc-bios/vof/bootmem.c b/pc-bios/vof/bootmem.c
new file mode 100644
index 0000000..771b9e9
--- /dev/null
+++ b/pc-bios/vof/bootmem.c
@@ -0,0 +1,14 @@
+#include "vof.h"
+
+void boot_from_memory(uint64_t initrd, uint64_t initrdsize)
+{
+ uint64_t kern[2];
+ phandle chosen = ci_finddevice("/chosen");
+
+ if (ci_getprop(chosen, "qemu,boot-kernel", kern, sizeof(kern)) !=
+ sizeof(kern)) {
+ return;
+ }
+
+ do_boot(kern[0], initrd, initrdsize);
+}
diff --git a/pc-bios/vof/ci.c b/pc-bios/vof/ci.c
new file mode 100644
index 0000000..fc4821b
--- /dev/null
+++ b/pc-bios/vof/ci.c
@@ -0,0 +1,91 @@
+#include "vof.h"
+
+struct prom_args {
+ uint32_t service;
+ uint32_t nargs;
+ uint32_t nret;
+ uint32_t args[10];
+};
+
+typedef unsigned long prom_arg_t;
+
+#define ADDR(x) (uint32_t)(x)
+
+static int prom_handle(struct prom_args *pargs)
+{
+ void *rtasbase;
+ uint32_t rtassize = 0;
+ phandle rtas;
+
+ if (strcmp("call-method", (void *)(unsigned long)pargs->service)) {
+ return -1;
+ }
+
+ if (strcmp("instantiate-rtas", (void *)(unsigned long)pargs->args[0])) {
+ return -1;
+ }
+
+ rtas = ci_finddevice("/rtas");
+ /* rtas-size is set by QEMU depending of FWNMI support */
+ ci_getprop(rtas, "rtas-size", &rtassize, sizeof(rtassize));
+ if (rtassize < hv_rtas_size) {
+ return -1;
+ }
+
+ rtasbase = (void *)(unsigned long) pargs->args[2];
+
+ memcpy(rtasbase, hv_rtas, hv_rtas_size);
+ pargs->args[pargs->nargs] = 0;
+ pargs->args[pargs->nargs + 1] = pargs->args[2];
+
+ return 0;
+}
+
+void prom_entry(uint32_t args)
+{
+ if (prom_handle((void *)(unsigned long) args)) {
+ ci_entry(args);
+ }
+}
+
+static int call_ci(const char *service, int nargs, int nret, ...)
+{
+ int i;
+ struct prom_args args;
+ va_list list;
+
+ args.service = ADDR(service);
+ args.nargs = nargs;
+ args.nret = nret;
+
+ va_start(list, nret);
+ for (i = 0; i < nargs; i++) {
+ args.args[i] = va_arg(list, prom_arg_t);
+ }
+ va_end(list);
+
+ for (i = 0; i < nret; i++) {
+ args.args[nargs + i] = 0;
+ }
+
+ if (ci_entry((uint32_t)(&args)) < 0) {
+ return -1;
+ }
+
+ return (nret > 0) ? args.args[nargs] : 0;
+}
+
+void ci_panic(const char *str)
+{
+ call_ci("exit", 0, 0);
+}
+
+phandle ci_finddevice(const char *path)
+{
+ return call_ci("finddevice", 1, 1, path);
+}
+
+uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len)
+{
+ return call_ci("getprop", 4, 1, ph, propname, prop, len);
+}
diff --git a/pc-bios/vof/entry.S b/pc-bios/vof/entry.S
new file mode 100644
index 0000000..10a101f
--- /dev/null
+++ b/pc-bios/vof/entry.S
@@ -0,0 +1,49 @@
+#define LOAD32(rn, name) \
+ lis rn,name##@h; \
+ ori rn,rn,name##@l
+
+#define ENTRY(func_name) \
+ .text; \
+ .align 2; \
+ .globl .func_name; \
+ .func_name: \
+ .globl func_name; \
+ func_name:
+
+#define KVMPPC_HCALL_BASE 0xf000
+#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5)
+
+ . = 0x100 /* Do exactly as SLOF does */
+
+ENTRY(_start)
+ LOAD32(2, __toc_start)
+ b entry_c
+
+ENTRY(_prom_entry)
+ LOAD32(2, __toc_start)
+ stwu %r1,-112(%r1)
+ stw %r31,104(%r1)
+ mflr %r31
+ bl prom_entry
+ nop
+ mtlr %r31
+ lwz %r31,104(%r1)
+ addi %r1,%r1,112
+ blr
+
+ENTRY(ci_entry)
+ mr 4,3
+ LOAD32(3,KVMPPC_H_VOF_CLIENT)
+ sc 1
+ blr
+
+/* This is the actual RTAS blob copied to the OS at instantiate-rtas */
+ENTRY(hv_rtas)
+ mr %r4,%r3
+ LOAD32(3,KVMPPC_H_RTAS)
+ sc 1
+ blr
+ .globl hv_rtas_size
+hv_rtas_size:
+ .long . - hv_rtas;
diff --git a/pc-bios/vof/libc.c b/pc-bios/vof/libc.c
new file mode 100644
index 0000000..fdbc30f
--- /dev/null
+++ b/pc-bios/vof/libc.c
@@ -0,0 +1,66 @@
+#include "vof.h"
+
+int strlen(const char *s)
+{
+ int len = 0;
+
+ while (*s != 0) {
+ len += 1;
+ s += 1;
+ }
+
+ return len;
+}
+
+int strcmp(const char *s1, const char *s2)
+{
+ while (*s1 != 0 && *s2 != 0) {
+ if (*s1 != *s2) {
+ break;
+ }
+ s1 += 1;
+ s2 += 1;
+ }
+
+ return *s1 - *s2;
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ char *cdest;
+ const char *csrc = src;
+
+ cdest = dest;
+ while (n-- > 0) {
+ *cdest++ = *csrc++;
+ }
+
+ return dest;
+}
+
+int memcmp(const void *ptr1, const void *ptr2, size_t n)
+{
+ const unsigned char *p1 = ptr1;
+ const unsigned char *p2 = ptr2;
+
+ while (n-- > 0) {
+ if (*p1 != *p2) {
+ return *p1 - *p2;
+ }
+ p1 += 1;
+ p2 += 1;
+ }
+
+ return 0;
+}
+
+void *memset(void *dest, int c, size_t size)
+{
+ unsigned char *d = (unsigned char *)dest;
+
+ while (size-- > 0) {
+ *d++ = (unsigned char)c;
+ }
+
+ return dest;
+}
diff --git a/pc-bios/vof/main.c b/pc-bios/vof/main.c
new file mode 100644
index 0000000..0f0f6b4
--- /dev/null
+++ b/pc-bios/vof/main.c
@@ -0,0 +1,21 @@
+#include "vof.h"
+
+void do_boot(unsigned long addr, unsigned long _r3, unsigned long _r4)
+{
+ register unsigned long r3 __asm__("r3") = _r3;
+ register unsigned long r4 __asm__("r4") = _r4;
+ register unsigned long r5 __asm__("r5") = (unsigned long) _prom_entry;
+
+ ((void (*)(void))(uint32_t)addr)();
+}
+
+void entry_c(void)
+{
+ register unsigned long r3 __asm__("r3");
+ register unsigned long r4 __asm__("r4");
+ register unsigned long r5 __asm__("r5");
+ uint64_t initrd = r3, initrdsize = r4;
+
+ boot_from_memory(initrd, initrdsize);
+ ci_panic("*** No boot target ***\n");
+}
diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h
new file mode 100644
index 0000000..5f12c07
--- /dev/null
+++ b/pc-bios/vof/vof.h
@@ -0,0 +1,41 @@
+/*
+ * Virtual Open Firmware
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <stdarg.h>
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned long uint32_t;
+typedef unsigned long long uint64_t;
+#define NULL (0)
+typedef unsigned long ihandle;
+typedef unsigned long phandle;
+typedef int size_t;
+
+/* globals */
+extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */
+
+void do_boot(unsigned long addr, unsigned long r3, unsigned long r4);
+
+/* libc */
+int strlen(const char *s);
+int strcmp(const char *s1, const char *s2);
+void *memcpy(void *dest, const void *src, size_t n);
+int memcmp(const void *ptr1, const void *ptr2, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+void *memset(void *dest, int c, size_t size);
+
+/* CI wrappers */
+void ci_panic(const char *str);
+phandle ci_finddevice(const char *path);
+uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len);
+
+/* booting from -kernel */
+void boot_from_memory(uint64_t initrd, uint64_t initrdsize);
+
+/* Entry points for CI and RTAS */
+extern uint32_t ci_entry(uint32_t params);
+extern unsigned long hv_rtas(unsigned long params);
+extern unsigned int hv_rtas_size;
diff --git a/pc-bios/vof/vof.lds b/pc-bios/vof/vof.lds
new file mode 100644
index 0000000..1506ab4
--- /dev/null
+++ b/pc-bios/vof/vof.lds
@@ -0,0 +1,48 @@
+OUTPUT_FORMAT("elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+
+/* set the entry point */
+ENTRY ( __start )
+
+SECTIONS {
+ __executable_start = .;
+
+ .text : {
+ *(.text)
+ }
+
+ __etext = .;
+
+ . = ALIGN(8);
+
+ .data : {
+ *(.data)
+ *(.rodata .rodata.*)
+ *(.got1)
+ *(.sdata)
+ *(.opd)
+ }
+
+ /* FIXME bss at end ??? */
+
+ . = ALIGN(8);
+ __bss_start = .;
+ .bss : {
+ *(.sbss) *(.scommon)
+ *(.dynbss)
+ *(.bss)
+ }
+
+ . = ALIGN(8);
+ __bss_end = .;
+ __bss_size = (__bss_end - __bss_start);
+
+ . = ALIGN(256);
+ __toc_start = DEFINED (.TOC.) ? .TOC. : ADDR (.got) + 0x8000;
+ .got :
+ {
+ *(.toc .got)
+ }
+ . = ALIGN(8);
+ __toc_end = .;
+}
diff --git a/roms/u-boot b/roms/u-boot
-Subproject b46dd116ce03e235f2a7d4843c6278e1da44b5e
+Subproject 840658b093976390e9537724f802281c9c8439f
diff --git a/softmmu/memory.c b/softmmu/memory.c
index f016151..cea2f62 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -2027,6 +2027,70 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
return imrc->num_indexes(iommu_mr);
}
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
+{
+ if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
+ return NULL;
+ }
+ return mr->rdm;
+}
+
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm)
+{
+ g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
+ g_assert(!rdm || !mr->rdm);
+ mr->rdm = rdm;
+}
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+ g_assert(rdmc->get_min_granularity);
+ return rdmc->get_min_granularity(rdm, mr);
+}
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+ g_assert(rdmc->is_populated);
+ return rdmc->is_populated(rdm, section);
+}
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamPopulate replay_fn,
+ void *opaque)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+ g_assert(rdmc->replay_populated);
+ return rdmc->replay_populated(rdm, section, replay_fn, opaque);
+}
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+ g_assert(rdmc->register_listener);
+ rdmc->register_listener(rdm, rdl, section);
+}
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+ g_assert(rdmc->unregister_listener);
+ rdmc->unregister_listener(rdm, rdl);
+}
+
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
{
uint8_t mask = 1 << client;
@@ -2637,6 +2701,33 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
return ret;
}
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s)
+{
+ MemoryRegionSection *tmp = g_new(MemoryRegionSection, 1);
+
+ *tmp = *s;
+ if (tmp->mr) {
+ memory_region_ref(tmp->mr);
+ }
+ if (tmp->fv) {
+ bool ret = flatview_ref(tmp->fv);
+
+ g_assert(ret);
+ }
+ return tmp;
+}
+
+void memory_region_section_free_copy(MemoryRegionSection *s)
+{
+ if (s->fv) {
+ flatview_unref(s->fv);
+ }
+ if (s->mr) {
+ memory_region_unref(s->mr);
+ }
+ g_free(s);
+}
+
bool memory_region_present(MemoryRegion *container, hwaddr addr)
{
MemoryRegion *mr;
@@ -3320,10 +3411,17 @@ static const TypeInfo iommu_memory_region_info = {
.abstract = true,
};
+static const TypeInfo ram_discard_manager_info = {
+ .parent = TYPE_INTERFACE,
+ .name = TYPE_RAM_DISCARD_MANAGER,
+ .class_size = sizeof(RamDiscardManagerClass),
+};
+
static void memory_register_types(void)
{
type_register_static(&memory_region_info);
type_register_static(&iommu_memory_region_info);
+ type_register_static(&ram_discard_manager_info);
}
type_init(memory_register_types)
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 9b171c9..3c1912a 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -3684,56 +3684,106 @@ void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
}
}
-/*
- * If positive, discarding RAM is disabled. If negative, discarding RAM is
- * required to work and cannot be disabled.
- */
-static int ram_block_discard_disabled;
+/* Require any discards to work. */
+static unsigned int ram_block_discard_required_cnt;
+/* Require only coordinated discards to work. */
+static unsigned int ram_block_coordinated_discard_required_cnt;
+/* Disable any discards. */
+static unsigned int ram_block_discard_disabled_cnt;
+/* Disable only uncoordinated discards. */
+static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
+static QemuMutex ram_block_discard_disable_mutex;
+
+static void ram_block_discard_disable_mutex_lock(void)
+{
+ static gsize initialized;
+
+ if (g_once_init_enter(&initialized)) {
+ qemu_mutex_init(&ram_block_discard_disable_mutex);
+ g_once_init_leave(&initialized, 1);
+ }
+ qemu_mutex_lock(&ram_block_discard_disable_mutex);
+}
+
+static void ram_block_discard_disable_mutex_unlock(void)
+{
+ qemu_mutex_unlock(&ram_block_discard_disable_mutex);
+}
int ram_block_discard_disable(bool state)
{
- int old;
+ int ret = 0;
+ ram_block_discard_disable_mutex_lock();
if (!state) {
- qatomic_dec(&ram_block_discard_disabled);
- return 0;
+ ram_block_discard_disabled_cnt--;
+ } else if (ram_block_discard_required_cnt ||
+ ram_block_coordinated_discard_required_cnt) {
+ ret = -EBUSY;
+ } else {
+ ram_block_discard_disabled_cnt++;
}
+ ram_block_discard_disable_mutex_unlock();
+ return ret;
+}
- do {
- old = qatomic_read(&ram_block_discard_disabled);
- if (old < 0) {
- return -EBUSY;
- }
- } while (qatomic_cmpxchg(&ram_block_discard_disabled,
- old, old + 1) != old);
- return 0;
+int ram_block_uncoordinated_discard_disable(bool state)
+{
+ int ret = 0;
+
+ ram_block_discard_disable_mutex_lock();
+ if (!state) {
+ ram_block_uncoordinated_discard_disabled_cnt--;
+ } else if (ram_block_discard_required_cnt) {
+ ret = -EBUSY;
+ } else {
+ ram_block_uncoordinated_discard_disabled_cnt++;
+ }
+ ram_block_discard_disable_mutex_unlock();
+ return ret;
}
int ram_block_discard_require(bool state)
{
- int old;
+ int ret = 0;
+ ram_block_discard_disable_mutex_lock();
if (!state) {
- qatomic_inc(&ram_block_discard_disabled);
- return 0;
+ ram_block_discard_required_cnt--;
+ } else if (ram_block_discard_disabled_cnt ||
+ ram_block_uncoordinated_discard_disabled_cnt) {
+ ret = -EBUSY;
+ } else {
+ ram_block_discard_required_cnt++;
}
+ ram_block_discard_disable_mutex_unlock();
+ return ret;
+}
- do {
- old = qatomic_read(&ram_block_discard_disabled);
- if (old > 0) {
- return -EBUSY;
- }
- } while (qatomic_cmpxchg(&ram_block_discard_disabled,
- old, old - 1) != old);
- return 0;
+int ram_block_coordinated_discard_require(bool state)
+{
+ int ret = 0;
+
+ ram_block_discard_disable_mutex_lock();
+ if (!state) {
+ ram_block_coordinated_discard_required_cnt--;
+ } else if (ram_block_discard_disabled_cnt) {
+ ret = -EBUSY;
+ } else {
+ ram_block_coordinated_discard_required_cnt++;
+ }
+ ram_block_discard_disable_mutex_unlock();
+ return ret;
}
bool ram_block_discard_is_disabled(void)
{
- return qatomic_read(&ram_block_discard_disabled) > 0;
+ return qatomic_read(&ram_block_discard_disabled_cnt) ||
+ qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
}
bool ram_block_discard_is_required(void)
{
- return qatomic_read(&ram_block_discard_disabled) < 0;
+ return qatomic_read(&ram_block_discard_required_cnt) ||
+ qatomic_read(&ram_block_coordinated_discard_required_cnt);
}
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 9210e61..bb392f6 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -227,22 +227,20 @@ int cpu_get_dump_info(ArchDumpInfo *info,
const struct GuestPhysBlockList *guest_phys_blocks)
{
PowerPCCPU *cpu;
- PowerPCCPUClass *pcc;
if (first_cpu == NULL) {
return -1;
}
cpu = POWERPC_CPU(first_cpu);
- pcc = POWERPC_CPU_GET_CLASS(cpu);
info->d_machine = PPC_ELF_MACHINE;
info->d_class = ELFCLASS;
- if ((*pcc->interrupts_big_endian)(cpu)) {
- info->d_endian = ELFDATA2MSB;
- } else {
+ if (ppc_interrupts_little_endian(cpu)) {
info->d_endian = ELFDATA2LSB;
+ } else {
+ info->d_endian = ELFDATA2MSB;
}
/* 64KB is the max page size for pseries kernel */
if (strncmp(object_get_typename(qdev_get_machine()),
diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
index 06b6571..5800fa3 100644
--- a/target/ppc/cpu-qom.h
+++ b/target/ppc/cpu-qom.h
@@ -198,8 +198,6 @@ struct PowerPCCPUClass {
int n_host_threads;
void (*init_proc)(CPUPPCState *env);
int (*check_pow)(CPUPPCState *env);
- int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx);
- bool (*interrupts_big_endian)(PowerPCCPU *cpu);
};
#ifndef CONFIG_USER_ONLY
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
index 19d67b5..a292998 100644
--- a/target/ppc/cpu.c
+++ b/target/ppc/cpu.c
@@ -72,7 +72,7 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
{
PowerPCCPU *cpu = env_archcpu(env);
qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
- assert(!cpu->vhyp);
+ assert(!cpu->env.has_hv_mode || !cpu->vhyp);
#if defined(TARGET_PPC64)
if (mmu_is_64bit(env->mmu_model)) {
target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index b4de0db..93d308a 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2643,6 +2643,21 @@ static inline bool ppc_has_spr(PowerPCCPU *cpu, int spr)
return cpu->env.spr_cb[spr].name != NULL;
}
+static inline bool ppc_interrupts_little_endian(PowerPCCPU *cpu)
+{
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+ /*
+ * Only models that have an LPCR and know about LPCR_ILE can do little
+ * endian.
+ */
+ if (pcc->lpcr_mask & LPCR_ILE) {
+ return !!(cpu->env.spr[SPR_LPCR] & LPCR_ILE);
+ }
+
+ return false;
+}
+
void dump_mmu(CPUPPCState *env);
void ppc_maybe_bswap_register(CPUPPCState *env, uint8_t *mem_buf, int len);
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index d0411e7..505a0ed 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -2666,18 +2666,6 @@ static int check_pow_hid0_74xx(CPUPPCState *env)
return 0;
}
-static bool ppc_cpu_interrupts_big_endian_always(PowerPCCPU *cpu)
-{
- return true;
-}
-
-#ifdef TARGET_PPC64
-static bool ppc_cpu_interrupts_big_endian_lpcr(PowerPCCPU *cpu)
-{
- return !(cpu->env.spr[SPR_LPCR] & LPCR_ILE);
-}
-#endif
-
/*****************************************************************************/
/* PowerPC implementations definitions */
@@ -4578,9 +4566,6 @@ POWERPC_FAMILY(601)(ObjectClass *oc, void *data)
(1ull << MSR_IR) |
(1ull << MSR_DR);
pcc->mmu_model = POWERPC_MMU_601;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_601;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_601;
@@ -4623,9 +4608,6 @@ POWERPC_FAMILY(601v)(ObjectClass *oc, void *data)
(1ull << MSR_IR) |
(1ull << MSR_DR);
pcc->mmu_model = POWERPC_MMU_601;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_601;
pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_RTC_CLK | POWERPC_FLAG_HID0_LE;
@@ -4889,9 +4871,6 @@ POWERPC_FAMILY(604)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_604;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_604;
@@ -4973,9 +4952,6 @@ POWERPC_FAMILY(604E)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_604;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_604;
@@ -5044,9 +5020,6 @@ POWERPC_FAMILY(740)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_7x0;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_750;
@@ -5124,9 +5097,6 @@ POWERPC_FAMILY(750)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_7x0;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_750;
@@ -5327,9 +5297,6 @@ POWERPC_FAMILY(750cl)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_7x0;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_750;
@@ -5410,9 +5377,6 @@ POWERPC_FAMILY(750cx)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_7x0;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_750;
@@ -5498,9 +5462,6 @@ POWERPC_FAMILY(750fx)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_7x0;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_750;
@@ -5586,9 +5547,6 @@ POWERPC_FAMILY(750gx)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_7x0;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_750;
@@ -5828,9 +5786,6 @@ POWERPC_FAMILY(7400)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_74xx;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_7400;
@@ -5914,9 +5869,6 @@ POWERPC_FAMILY(7410)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_74xx;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_7400;
@@ -6743,9 +6695,6 @@ POWERPC_FAMILY(e600)(ObjectClass *oc, void *data)
(1ull << MSR_RI) |
(1ull << MSR_LE);
pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
pcc->excp_model = POWERPC_EXCP_74xx;
pcc->bus_model = PPC_FLAGS_INPUT_6xx;
pcc->bfd_mach = bfd_mach_ppc_7400;
@@ -7505,7 +7454,6 @@ POWERPC_FAMILY(970)(ObjectClass *oc, void *data)
(1ull << MSR_RI);
pcc->mmu_model = POWERPC_MMU_64B;
#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
pcc->hash64_opts = &ppc_hash64_opts_basic;
#endif
pcc->excp_model = POWERPC_EXCP_970;
@@ -7583,7 +7531,6 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
LPCR_RMI | LPCR_HDICE;
pcc->mmu_model = POWERPC_MMU_2_03;
#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
pcc->hash64_opts = &ppc_hash64_opts_basic;
pcc->lrg_decr_bits = 32;
#endif
@@ -7727,7 +7674,6 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
pcc->lpcr_pm = LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2;
pcc->mmu_model = POWERPC_MMU_2_06;
#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
pcc->hash64_opts = &ppc_hash64_opts_POWER7;
pcc->lrg_decr_bits = 32;
#endif
@@ -7740,7 +7686,6 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
POWERPC_FLAG_VSX;
pcc->l1_dcache_size = 0x8000;
pcc->l1_icache_size = 0x8000;
- pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
}
static void init_proc_POWER8(CPUPPCState *env)
@@ -7904,7 +7849,6 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
LPCR_P8_PECE3 | LPCR_P8_PECE4;
pcc->mmu_model = POWERPC_MMU_2_07;
#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
pcc->hash64_opts = &ppc_hash64_opts_POWER7;
pcc->lrg_decr_bits = 32;
pcc->n_host_threads = 8;
@@ -7918,7 +7862,6 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
pcc->l1_dcache_size = 0x8000;
pcc->l1_icache_size = 0x8000;
- pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
}
#ifdef CONFIG_SOFTMMU
@@ -8120,7 +8063,6 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE;
pcc->mmu_model = POWERPC_MMU_3_00;
#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc64_v3_handle_mmu_fault;
/* segment page size remain the same */
pcc->hash64_opts = &ppc_hash64_opts_POWER7;
pcc->radix_page_info = &POWER9_radix_page_info;
@@ -8136,7 +8078,6 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
pcc->l1_dcache_size = 0x8000;
pcc->l1_icache_size = 0x8000;
- pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
}
#ifdef CONFIG_SOFTMMU
@@ -8332,7 +8273,6 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE;
pcc->mmu_model = POWERPC_MMU_3_00;
#if defined(CONFIG_SOFTMMU)
- pcc->handle_mmu_fault = ppc64_v3_handle_mmu_fault;
/* segment page size remain the same */
pcc->hash64_opts = &ppc_hash64_opts_POWER7;
pcc->radix_page_info = &POWER10_radix_page_info;
@@ -8347,7 +8287,6 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
pcc->l1_dcache_size = 0x8000;
pcc->l1_icache_size = 0x8000;
- pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
}
#if !defined(CONFIG_USER_ONLY)
@@ -8908,9 +8847,11 @@ static void ppc_cpu_reset(DeviceState *dev)
#if !defined(CONFIG_USER_ONLY)
env->nip = env->hreset_vector | env->excp_prefix;
+#if defined(CONFIG_TCG)
if (env->mmu_model != POWERPC_MMU_REAL) {
ppc_tlb_invalidate_all(env);
}
+#endif /* CONFIG_TCG */
#endif
hreg_compute_hflags(env);
@@ -9094,7 +9035,6 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
device_class_set_parent_unrealize(dc, ppc_cpu_unrealize,
&pcc->parent_unrealize);
pcc->pvr_match = ppc_pvr_match_default;
- pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_always;
device_class_set_props(dc, ppc_cpu_properties);
device_class_set_parent_reset(dc, ppc_cpu_reset, &pcc->parent_reset);
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index fd147e2..a79a0ed 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1099,7 +1099,6 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = &cpu->env;
- PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
target_ulong msr = 0;
/*
@@ -1108,7 +1107,7 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector)
*/
msr = (1ULL << MSR_ME);
msr |= env->msr & (1ULL << MSR_SF);
- if (!(*pcc->interrupts_big_endian)(cpu)) {
+ if (ppc_interrupts_little_endian(cpu)) {
msr |= (1ULL << MSR_LE);
}
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 104a308..dc93b99 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist;
static int cap_ppc_nested_kvm_hv;
static int cap_large_decr;
static int cap_fwnmi;
+static int cap_rpt_invalidate;
static uint32_t debug_inst_opcode;
@@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
exit(1);
}
+ cap_rpt_invalidate = kvm_vm_check_extension(s, KVM_CAP_PPC_RPT_INVALIDATE);
kvm_ppc_register_host_cpu_type();
return 0;
@@ -2040,6 +2042,11 @@ void kvmppc_enable_h_page_init(void)
kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
}
+void kvmppc_enable_h_rpt_invalidate(void)
+{
+ kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE);
+}
+
void kvmppc_set_papr(PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
@@ -2551,6 +2558,11 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
return 0;
}
+int kvmppc_has_cap_rpt_invalidate(void)
+{
+ return cap_rpt_invalidate;
+}
+
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
{
uint32_t host_pvr = mfpvr();
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 989f61a..ee9325b 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -24,6 +24,7 @@ void kvmppc_enable_logical_ci_hcalls(void);
void kvmppc_enable_set_mode_hcall(void);
void kvmppc_enable_clear_ref_mod_hcalls(void);
void kvmppc_enable_h_page_init(void);
+void kvmppc_enable_h_rpt_invalidate(void);
void kvmppc_set_papr(PowerPCCPU *cpu);
int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
@@ -71,6 +72,7 @@ bool kvmppc_has_cap_nested_kvm_hv(void);
int kvmppc_set_cap_nested_kvm_hv(int enable);
int kvmppc_get_cap_large_decr(void);
int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
+int kvmppc_has_cap_rpt_invalidate(void);
int kvmppc_enable_hwrng(void);
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
@@ -150,6 +152,11 @@ static inline void kvmppc_enable_h_page_init(void)
{
}
+static inline void kvmppc_enable_h_rpt_invalidate(void)
+{
+ g_assert_not_reached();
+}
+
static inline void kvmppc_set_papr(PowerPCCPU *cpu)
{
}
@@ -381,6 +388,11 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
return -1;
}
+static inline int kvmppc_has_cap_rpt_invalidate(void)
+{
+ return false;
+}
+
static inline int kvmppc_enable_hwrng(void)
{
return -1;
diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
index c78fd8d..f4985ba 100644
--- a/target/ppc/mmu-book3s-v3.c
+++ b/target/ppc/mmu-book3s-v3.c
@@ -23,25 +23,6 @@
#include "mmu-book3s-v3.h"
#include "mmu-radix64.h"
-int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
- int mmu_idx)
-{
- if (ppc64_v3_radix(cpu)) { /* Guest uses radix */
- return ppc_radix64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx);
- } else { /* Guest uses hash */
- return ppc_hash64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx);
- }
-}
-
-hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr)
-{
- if (ppc64_v3_radix(cpu)) {
- return ppc_radix64_get_phys_page_debug(cpu, eaddr);
- } else {
- return ppc_hash64_get_phys_page_debug(cpu, eaddr);
- }
-}
-
bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry)
{
uint64_t patb = cpu->env.spr[SPR_PTCR] & PTCR_PATB;
diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index 7b89be5..d6d5ed8 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h
@@ -21,6 +21,7 @@
#define PPC_MMU_BOOK3S_V3_H
#include "mmu-hash64.h"
+#include "mmu-books.h"
#ifndef CONFIG_USER_ONLY
@@ -67,11 +68,6 @@ static inline bool ppc64_v3_radix(PowerPCCPU *cpu)
return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
}
-hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr);
-
-int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
- int mmu_idx);
-
static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
{
uint64_t base;
diff --git a/target/ppc/mmu-books.h b/target/ppc/mmu-books.h
new file mode 100644
index 0000000..0d12551
--- /dev/null
+++ b/target/ppc/mmu-books.h
@@ -0,0 +1,30 @@
+/*
+ * PowerPC BookS emulation generic mmu definitions for qemu.
+ *
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_MMU_BOOKS_H
+#define PPC_MMU_BOOKS_H
+
+/*
+ * These correspond to the mmu_idx values computed in
+ * hreg_compute_hflags_value. See the tables therein
+ */
+static inline bool mmuidx_pr(int idx) { return !(idx & 1); }
+static inline bool mmuidx_real(int idx) { return idx & 2; }
+static inline bool mmuidx_hv(int idx) { return idx & 4; }
+#endif /* PPC_MMU_BOOKS_H */
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 9f0a497..3957aab 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -25,9 +25,10 @@
#include "kvm_ppc.h"
#include "internal.h"
#include "mmu-hash32.h"
+#include "mmu-books.h"
#include "exec/log.h"
-/* #define DEBUG_BAT */
+/* #define DEBUG_BATS */
#ifdef DEBUG_BATS
# define LOG_BATS(...) qemu_log_mask(CPU_LOG_MMU, __VA_ARGS__)
@@ -86,25 +87,22 @@ static int ppc_hash32_pp_prot(int key, int pp, int nx)
return prot;
}
-static int ppc_hash32_pte_prot(PowerPCCPU *cpu,
+static int ppc_hash32_pte_prot(int mmu_idx,
target_ulong sr, ppc_hash_pte32_t pte)
{
- CPUPPCState *env = &cpu->env;
unsigned pp, key;
- key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
+ key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
pp = pte.pte1 & HPTE32_R_PP;
return ppc_hash32_pp_prot(key, pp, !!(sr & SR32_NX));
}
-static target_ulong hash32_bat_size(PowerPCCPU *cpu,
+static target_ulong hash32_bat_size(int mmu_idx,
target_ulong batu, target_ulong batl)
{
- CPUPPCState *env = &cpu->env;
-
- if ((msr_pr && !(batu & BATU32_VP))
- || (!msr_pr && !(batu & BATU32_VS))) {
+ if ((mmuidx_pr(mmu_idx) && !(batu & BATU32_VP))
+ || (!mmuidx_pr(mmu_idx) && !(batu & BATU32_VS))) {
return 0;
}
@@ -137,14 +135,13 @@ static target_ulong hash32_bat_601_size(PowerPCCPU *cpu,
return BATU32_BEPI & ~((batl & BATL32_601_BL) << 17);
}
-static int hash32_bat_601_prot(PowerPCCPU *cpu,
+static int hash32_bat_601_prot(int mmu_idx,
target_ulong batu, target_ulong batl)
{
- CPUPPCState *env = &cpu->env;
int key, pp;
pp = batu & BATU32_601_PP;
- if (msr_pr == 0) {
+ if (mmuidx_pr(mmu_idx) == 0) {
key = !!(batu & BATU32_601_KS);
} else {
key = !!(batu & BATU32_601_KP);
@@ -153,7 +150,8 @@ static int hash32_bat_601_prot(PowerPCCPU *cpu,
}
static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
- MMUAccessType access_type, int *prot)
+ MMUAccessType access_type, int *prot,
+ int mmu_idx)
{
CPUPPCState *env = &cpu->env;
target_ulong *BATlt, *BATut;
@@ -177,7 +175,7 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
if (unlikely(env->mmu_model == POWERPC_MMU_601)) {
mask = hash32_bat_601_size(cpu, batu, batl);
} else {
- mask = hash32_bat_size(cpu, batu, batl);
+ mask = hash32_bat_size(mmu_idx, batu, batl);
}
LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
" BATl " TARGET_FMT_lx "\n", __func__,
@@ -187,7 +185,7 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
hwaddr raddr = (batl & mask) | (ea & ~mask);
if (unlikely(env->mmu_model == POWERPC_MMU_601)) {
- *prot = hash32_bat_601_prot(cpu, batu, batl);
+ *prot = hash32_bat_601_prot(mmu_idx, batu, batl);
} else {
*prot = hash32_bat_prot(cpu, batu, batl);
}
@@ -199,6 +197,9 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
/* No hit */
#if defined(DEBUG_BATS)
if (qemu_log_enabled()) {
+ target_ulong *BATu, *BATl;
+ target_ulong BEPIl, BEPIu, bl;
+
LOG_BATS("no BAT match for " TARGET_FMT_lx ":\n", ea);
for (i = 0; i < 4; i++) {
BATu = &BATut[i];
@@ -218,14 +219,15 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
return -1;
}
-static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
- target_ulong eaddr,
- MMUAccessType access_type,
- hwaddr *raddr, int *prot)
+static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
+ target_ulong eaddr,
+ MMUAccessType access_type,
+ hwaddr *raddr, int *prot, int mmu_idx,
+ bool guest_visible)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
- int key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
+ int key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
@@ -238,17 +240,23 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
*/
*raddr = ((sr & 0xF) << 28) | (eaddr & 0x0FFFFFFF);
*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- return 0;
+ return true;
}
if (access_type == MMU_INST_FETCH) {
/* No code fetch is allowed in direct-store areas */
- cs->exception_index = POWERPC_EXCP_ISI;
- env->error_code = 0x10000000;
- return 1;
+ if (guest_visible) {
+ cs->exception_index = POWERPC_EXCP_ISI;
+ env->error_code = 0x10000000;
+ }
+ return false;
}
- switch (env->access_type) {
+ /*
+ * From ppc_cpu_get_phys_page_debug, env->access_type is not set.
+ * Assume ACCESS_INT for that case.
+ */
+ switch (guest_visible ? env->access_type : ACCESS_INT) {
case ACCESS_INT:
/* Integer load/store : only access allowed */
break;
@@ -257,7 +265,7 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
cs->exception_index = POWERPC_EXCP_ALIGN;
env->error_code = POWERPC_EXCP_ALIGN_FP;
env->spr[SPR_DAR] = eaddr;
- return 1;
+ return false;
case ACCESS_RES:
/* lwarx, ldarx or srwcx. */
env->error_code = 0;
@@ -267,7 +275,7 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
} else {
env->spr[SPR_DSISR] = 0x04000000;
}
- return 1;
+ return false;
case ACCESS_CACHE:
/*
* dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
@@ -276,7 +284,7 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
* no-op, it's quite easy :-)
*/
*raddr = eaddr;
- return 0;
+ return true;
case ACCESS_EXT:
/* eciwx or ecowx */
cs->exception_index = POWERPC_EXCP_DSI;
@@ -287,16 +295,18 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
} else {
env->spr[SPR_DSISR] = 0x04100000;
}
- return 1;
+ return false;
default:
- cpu_abort(cs, "ERROR: instruction should not need "
- "address translation\n");
+ cpu_abort(cs, "ERROR: insn should not need address translation\n");
}
- if ((access_type == MMU_DATA_STORE || key != 1) &&
- (access_type == MMU_DATA_LOAD || key != 0)) {
+
+ *prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ;
+ if (*prot & prot_for_access_type(access_type)) {
*raddr = eaddr;
- return 0;
- } else {
+ return true;
+ }
+
+ if (guest_visible) {
cs->exception_index = POWERPC_EXCP_DSI;
env->error_code = 0;
env->spr[SPR_DAR] = eaddr;
@@ -305,8 +315,8 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
} else {
env->spr[SPR_DSISR] = 0x08000000;
}
- return 1;
}
+ return false;
}
hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
@@ -415,8 +425,9 @@ static hwaddr ppc_hash32_pte_raddr(target_ulong sr, ppc_hash_pte32_t pte,
return (rpn & ~mask) | (eaddr & mask);
}
-int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
- int mmu_idx)
+bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+ bool guest_visible)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
@@ -425,48 +436,45 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
ppc_hash_pte32_t pte;
int prot;
int need_prot;
- MMUAccessType access_type;
hwaddr raddr;
- assert((rwx == 0) || (rwx == 1) || (rwx == 2));
- access_type = rwx;
- need_prot = prot_for_access_type(access_type);
+ /* There are no hash32 large pages. */
+ *psizep = TARGET_PAGE_BITS;
/* 1. Handle real mode accesses */
- if (access_type == MMU_INST_FETCH ? !msr_ir : !msr_dr) {
+ if (mmuidx_real(mmu_idx)) {
/* Translation is off */
- raddr = eaddr;
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
- PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
- TARGET_PAGE_SIZE);
- return 0;
+ *raddrp = eaddr;
+ *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ return true;
}
+ need_prot = prot_for_access_type(access_type);
+
/* 2. Check Block Address Translation entries (BATs) */
if (env->nb_BATs != 0) {
- raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, &prot);
+ raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, mmu_idx);
if (raddr != -1) {
- if (need_prot & ~prot) {
- if (access_type == MMU_INST_FETCH) {
- cs->exception_index = POWERPC_EXCP_ISI;
- env->error_code = 0x08000000;
- } else {
- cs->exception_index = POWERPC_EXCP_DSI;
- env->error_code = 0;
- env->spr[SPR_DAR] = eaddr;
- if (access_type == MMU_DATA_STORE) {
- env->spr[SPR_DSISR] = 0x0a000000;
+ if (need_prot & ~*protp) {
+ if (guest_visible) {
+ if (access_type == MMU_INST_FETCH) {
+ cs->exception_index = POWERPC_EXCP_ISI;
+ env->error_code = 0x08000000;
} else {
- env->spr[SPR_DSISR] = 0x08000000;
+ cs->exception_index = POWERPC_EXCP_DSI;
+ env->error_code = 0;
+ env->spr[SPR_DAR] = eaddr;
+ if (access_type == MMU_DATA_STORE) {
+ env->spr[SPR_DSISR] = 0x0a000000;
+ } else {
+ env->spr[SPR_DSISR] = 0x08000000;
+ }
}
}
- return 1;
+ return false;
}
-
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK,
- raddr & TARGET_PAGE_MASK, prot, mmu_idx,
- TARGET_PAGE_SIZE);
- return 0;
+ *raddrp = raddr;
+ return true;
}
}
@@ -475,67 +483,65 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
/* 4. Handle direct store segments */
if (sr & SR32_T) {
- if (ppc_hash32_direct_store(cpu, sr, eaddr, access_type,
- &raddr, &prot) == 0) {
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK,
- raddr & TARGET_PAGE_MASK, prot, mmu_idx,
- TARGET_PAGE_SIZE);
- return 0;
- } else {
- return 1;
- }
+ return ppc_hash32_direct_store(cpu, sr, eaddr, access_type,
+ raddrp, protp, mmu_idx, guest_visible);
}
/* 5. Check for segment level no-execute violation */
if (access_type == MMU_INST_FETCH && (sr & SR32_NX)) {
- cs->exception_index = POWERPC_EXCP_ISI;
- env->error_code = 0x10000000;
- return 1;
+ if (guest_visible) {
+ cs->exception_index = POWERPC_EXCP_ISI;
+ env->error_code = 0x10000000;
+ }
+ return false;
}
/* 6. Locate the PTE in the hash table */
pte_offset = ppc_hash32_htab_lookup(cpu, sr, eaddr, &pte);
if (pte_offset == -1) {
- if (access_type == MMU_INST_FETCH) {
- cs->exception_index = POWERPC_EXCP_ISI;
- env->error_code = 0x40000000;
- } else {
- cs->exception_index = POWERPC_EXCP_DSI;
- env->error_code = 0;
- env->spr[SPR_DAR] = eaddr;
- if (access_type == MMU_DATA_STORE) {
- env->spr[SPR_DSISR] = 0x42000000;
+ if (guest_visible) {
+ if (access_type == MMU_INST_FETCH) {
+ cs->exception_index = POWERPC_EXCP_ISI;
+ env->error_code = 0x40000000;
} else {
- env->spr[SPR_DSISR] = 0x40000000;
+ cs->exception_index = POWERPC_EXCP_DSI;
+ env->error_code = 0;
+ env->spr[SPR_DAR] = eaddr;
+ if (access_type == MMU_DATA_STORE) {
+ env->spr[SPR_DSISR] = 0x42000000;
+ } else {
+ env->spr[SPR_DSISR] = 0x40000000;
+ }
}
}
-
- return 1;
+ return false;
}
qemu_log_mask(CPU_LOG_MMU,
"found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
/* 7. Check access permissions */
- prot = ppc_hash32_pte_prot(cpu, sr, pte);
+ prot = ppc_hash32_pte_prot(mmu_idx, sr, pte);
if (need_prot & ~prot) {
/* Access right violation */
qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
- if (access_type == MMU_INST_FETCH) {
- cs->exception_index = POWERPC_EXCP_ISI;
- env->error_code = 0x08000000;
- } else {
- cs->exception_index = POWERPC_EXCP_DSI;
- env->error_code = 0;
- env->spr[SPR_DAR] = eaddr;
- if (access_type == MMU_DATA_STORE) {
- env->spr[SPR_DSISR] = 0x0a000000;
+ if (guest_visible) {
+ if (access_type == MMU_INST_FETCH) {
+ cs->exception_index = POWERPC_EXCP_ISI;
+ env->error_code = 0x08000000;
} else {
- env->spr[SPR_DSISR] = 0x08000000;
+ cs->exception_index = POWERPC_EXCP_DSI;
+ env->error_code = 0;
+ env->spr[SPR_DAR] = eaddr;
+ if (access_type == MMU_DATA_STORE) {
+ env->spr[SPR_DSISR] = 0x0a000000;
+ } else {
+ env->spr[SPR_DSISR] = 0x08000000;
+ }
}
}
- return 1;
+ return false;
}
qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
@@ -559,45 +565,7 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
/* 9. Determine the real address from the PTE */
- raddr = ppc_hash32_pte_raddr(sr, pte, eaddr);
-
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
- prot, mmu_idx, TARGET_PAGE_SIZE);
-
- return 0;
-}
-
-hwaddr ppc_hash32_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr)
-{
- CPUPPCState *env = &cpu->env;
- target_ulong sr;
- hwaddr pte_offset;
- ppc_hash_pte32_t pte;
- int prot;
-
- if (msr_dr == 0) {
- /* Translation is off */
- return eaddr;
- }
-
- if (env->nb_BATs != 0) {
- hwaddr raddr = ppc_hash32_bat_lookup(cpu, eaddr, 0, &prot);
- if (raddr != -1) {
- return raddr;
- }
- }
-
- sr = env->sr[eaddr >> 28];
-
- if (sr & SR32_T) {
- /* FIXME: Add suitable debug support for Direct Store segments */
- return -1;
- }
-
- pte_offset = ppc_hash32_htab_lookup(cpu, sr, eaddr, &pte);
- if (pte_offset == -1) {
- return -1;
- }
-
- return ppc_hash32_pte_raddr(sr, pte, eaddr) & TARGET_PAGE_MASK;
+ *raddrp = ppc_hash32_pte_raddr(sr, pte, eaddr);
+ *protp = prot;
+ return true;
}
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 898021f..3892b69 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -4,9 +4,9 @@
#ifndef CONFIG_USER_ONLY
hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash);
-hwaddr ppc_hash32_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
-int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
- int mmu_idx);
+bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+ bool guest_visible);
/*
* Segment register definitions
@@ -22,6 +22,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
* Block Address Translation (BAT) definitions
*/
+#define BATU32_BEPIU 0xf0000000
+#define BATU32_BEPIL 0x0ffe0000
#define BATU32_BEPI 0xfffe0000
#define BATU32_BL 0x00001ffc
#define BATU32_VS 0x00000002
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 708dffc..19832c4 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -366,10 +366,9 @@ static inline int ppc_hash64_pte_noexec_guard(PowerPCCPU *cpu,
}
/* Check Basic Storage Protection */
-static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
+static int ppc_hash64_pte_prot(int mmu_idx,
ppc_slb_t *slb, ppc_hash_pte64_t pte)
{
- CPUPPCState *env = &cpu->env;
unsigned pp, key;
/*
* Some pp bit combinations have undefined behaviour, so default
@@ -377,7 +376,7 @@ static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
*/
int prot = 0;
- key = !!(msr_pr ? (slb->vsid & SLB_VSID_KP)
+ key = !!(mmuidx_pr(mmu_idx) ? (slb->vsid & SLB_VSID_KP)
: (slb->vsid & SLB_VSID_KS));
pp = (pte.pte1 & HPTE64_R_PP) | ((pte.pte1 & HPTE64_R_PP0) >> 61);
@@ -744,17 +743,17 @@ static bool ppc_hash64_use_vrma(CPUPPCState *env)
}
}
-static void ppc_hash64_set_isi(CPUState *cs, uint64_t error_code)
+static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
{
CPUPPCState *env = &POWERPC_CPU(cs)->env;
bool vpm;
- if (msr_ir) {
+ if (!mmuidx_real(mmu_idx)) {
vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
} else {
vpm = ppc_hash64_use_vrma(env);
}
- if (vpm && !msr_hv) {
+ if (vpm && !mmuidx_hv(mmu_idx)) {
cs->exception_index = POWERPC_EXCP_HISI;
} else {
cs->exception_index = POWERPC_EXCP_ISI;
@@ -762,17 +761,17 @@ static void ppc_hash64_set_isi(CPUState *cs, uint64_t error_code)
env->error_code = error_code;
}
-static void ppc_hash64_set_dsi(CPUState *cs, uint64_t dar, uint64_t dsisr)
+static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t dsisr)
{
CPUPPCState *env = &POWERPC_CPU(cs)->env;
bool vpm;
- if (msr_dr) {
+ if (!mmuidx_real(mmu_idx)) {
vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
} else {
vpm = ppc_hash64_use_vrma(env);
}
- if (vpm && !msr_hv) {
+ if (vpm && !mmuidx_hv(mmu_idx)) {
cs->exception_index = POWERPC_EXCP_HDSI;
env->spr[SPR_HDAR] = dar;
env->spr[SPR_HDSISR] = dsisr;
@@ -873,8 +872,9 @@ static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
return -1;
}
-int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
- int rwx, int mmu_idx)
+bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+ bool guest_visible)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
@@ -884,13 +884,9 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
hwaddr ptex;
ppc_hash_pte64_t pte;
int exec_prot, pp_prot, amr_prot, prot;
- MMUAccessType access_type;
int need_prot;
hwaddr raddr;
- assert((rwx == 0) || (rwx == 1) || (rwx == 2));
- access_type = rwx;
-
/*
* Note on LPCR usage: 970 uses HID4, but our special variant of
* store_spr copies relevant fields into env->spr[SPR_LPCR].
@@ -900,7 +896,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
*/
/* 1. Handle real mode accesses */
- if (access_type == MMU_INST_FETCH ? !msr_ir : !msr_dr) {
+ if (mmuidx_real(mmu_idx)) {
/*
* Translation is supposedly "off", but in real mode the top 4
* effective address bits are (mostly) ignored
@@ -912,7 +908,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
* In virtual hypervisor mode, there's nothing to do:
* EA == GPA == qemu guest address
*/
- } else if (msr_hv || !env->has_hv_mode) {
+ } else if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
/* In HV mode, add HRMOR if top EA bit is clear */
if (!(eaddr >> 63)) {
raddr |= env->spr[SPR_HRMOR];
@@ -922,9 +918,11 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
slb = &vrma_slbe;
if (build_vrma_slbe(cpu, slb) != 0) {
/* Invalid VRMA setup, machine check */
- cs->exception_index = POWERPC_EXCP_MCHECK;
- env->error_code = 0;
- return 1;
+ if (guest_visible) {
+ cs->exception_index = POWERPC_EXCP_MCHECK;
+ env->error_code = 0;
+ }
+ return false;
}
goto skip_slb_search;
@@ -933,29 +931,33 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
/* Emulated old-style RMO mode, bounds check against RMLS */
if (raddr >= limit) {
+ if (!guest_visible) {
+ return false;
+ }
switch (access_type) {
case MMU_INST_FETCH:
- ppc_hash64_set_isi(cs, SRR1_PROTFAULT);
+ ppc_hash64_set_isi(cs, mmu_idx, SRR1_PROTFAULT);
break;
case MMU_DATA_LOAD:
- ppc_hash64_set_dsi(cs, eaddr, DSISR_PROTFAULT);
+ ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_PROTFAULT);
break;
case MMU_DATA_STORE:
- ppc_hash64_set_dsi(cs, eaddr,
+ ppc_hash64_set_dsi(cs, mmu_idx, eaddr,
DSISR_PROTFAULT | DSISR_ISSTORE);
break;
default:
g_assert_not_reached();
}
- return 1;
+ return false;
}
raddr |= env->spr[SPR_RMOR];
}
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
- PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
- TARGET_PAGE_SIZE);
- return 0;
+
+ *raddrp = raddr;
+ *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ *psizep = TARGET_PAGE_BITS;
+ return true;
}
/* 2. Translation is on, so look up the SLB */
@@ -968,6 +970,9 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
exit(1);
}
/* Segment still not found, generate the appropriate interrupt */
+ if (!guest_visible) {
+ return false;
+ }
switch (access_type) {
case MMU_INST_FETCH:
cs->exception_index = POWERPC_EXCP_ISEG;
@@ -982,34 +987,39 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
default:
g_assert_not_reached();
}
- return 1;
+ return false;
}
-skip_slb_search:
+ skip_slb_search:
/* 3. Check for segment level no-execute violation */
if (access_type == MMU_INST_FETCH && (slb->vsid & SLB_VSID_N)) {
- ppc_hash64_set_isi(cs, SRR1_NOEXEC_GUARD);
- return 1;
+ if (guest_visible) {
+ ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOEXEC_GUARD);
+ }
+ return false;
}
/* 4. Locate the PTE in the hash table */
ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
if (ptex == -1) {
+ if (!guest_visible) {
+ return false;
+ }
switch (access_type) {
case MMU_INST_FETCH:
- ppc_hash64_set_isi(cs, SRR1_NOPTE);
+ ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOPTE);
break;
case MMU_DATA_LOAD:
- ppc_hash64_set_dsi(cs, eaddr, DSISR_NOPTE);
+ ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE);
break;
case MMU_DATA_STORE:
- ppc_hash64_set_dsi(cs, eaddr, DSISR_NOPTE | DSISR_ISSTORE);
+ ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE | DSISR_ISSTORE);
break;
default:
g_assert_not_reached();
}
- return 1;
+ return false;
}
qemu_log_mask(CPU_LOG_MMU,
"found PTE at index %08" HWADDR_PRIx "\n", ptex);
@@ -1017,7 +1027,7 @@ skip_slb_search:
/* 5. Check access permissions */
exec_prot = ppc_hash64_pte_noexec_guard(cpu, pte);
- pp_prot = ppc_hash64_pte_prot(cpu, slb, pte);
+ pp_prot = ppc_hash64_pte_prot(mmu_idx, slb, pte);
amr_prot = ppc_hash64_amr_prot(cpu, pte);
prot = exec_prot & pp_prot & amr_prot;
@@ -1025,6 +1035,9 @@ skip_slb_search:
if (need_prot & ~prot) {
/* Access right violation */
qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
+ if (!guest_visible) {
+ return false;
+ }
if (access_type == MMU_INST_FETCH) {
int srr1 = 0;
if (PAGE_EXEC & ~exec_prot) {
@@ -1035,7 +1048,7 @@ skip_slb_search:
if (PAGE_EXEC & ~amr_prot) {
srr1 |= SRR1_IAMR; /* Access violates virt pg class key prot */
}
- ppc_hash64_set_isi(cs, srr1);
+ ppc_hash64_set_isi(cs, mmu_idx, srr1);
} else {
int dsisr = 0;
if (need_prot & ~pp_prot) {
@@ -1047,9 +1060,9 @@ skip_slb_search:
if (need_prot & ~amr_prot) {
dsisr |= DSISR_AMR;
}
- ppc_hash64_set_dsi(cs, eaddr, dsisr);
+ ppc_hash64_set_dsi(cs, mmu_idx, eaddr, dsisr);
}
- return 1;
+ return false;
}
qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
@@ -1073,66 +1086,10 @@ skip_slb_search:
/* 7. Determine the real address from the PTE */
- raddr = deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, eaddr);
-
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
- prot, mmu_idx, 1ULL << apshift);
-
- return 0;
-}
-
-hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
-{
- CPUPPCState *env = &cpu->env;
- ppc_slb_t vrma_slbe;
- ppc_slb_t *slb;
- hwaddr ptex, raddr;
- ppc_hash_pte64_t pte;
- unsigned apshift;
-
- /* Handle real mode */
- if (msr_dr == 0) {
- /* In real mode the top 4 effective address bits are ignored */
- raddr = addr & 0x0FFFFFFFFFFFFFFFULL;
-
- if (cpu->vhyp) {
- /*
- * In virtual hypervisor mode, there's nothing to do:
- * EA == GPA == qemu guest address
- */
- return raddr;
- } else if ((msr_hv || !env->has_hv_mode) && !(addr >> 63)) {
- /* In HV mode, add HRMOR if top EA bit is clear */
- return raddr | env->spr[SPR_HRMOR];
- } else if (ppc_hash64_use_vrma(env)) {
- /* Emulated VRMA mode */
- slb = &vrma_slbe;
- if (build_vrma_slbe(cpu, slb) != 0) {
- return -1;
- }
- } else {
- target_ulong limit = rmls_limit(cpu);
-
- /* Emulated old-style RMO mode, bounds check against RMLS */
- if (raddr >= limit) {
- return -1;
- }
- return raddr | env->spr[SPR_RMOR];
- }
- } else {
- slb = slb_lookup(cpu, addr);
- if (!slb) {
- return -1;
- }
- }
-
- ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
- if (ptex == -1) {
- return -1;
- }
-
- return deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, addr)
- & TARGET_PAGE_MASK;
+ *raddrp = deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, eaddr);
+ *protp = prot;
+ *psizep = apshift;
+ return true;
}
void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 4b8b8e7..c5b2f97 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -7,9 +7,9 @@
void dump_slb(PowerPCCPU *cpu);
int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
target_ulong esid, target_ulong vsid);
-hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
-int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
- int mmu_idx);
+bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+ bool guest_visible);
void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
target_ulong pte_index,
target_ulong pte0, target_ulong pte1);
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index b6d191c..5b0e62e 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -155,7 +155,7 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type,
static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
uint64_t pte, int *fault_cause, int *prot,
- bool partition_scoped)
+ int mmu_idx, bool partition_scoped)
{
CPUPPCState *env = &cpu->env;
int need_prot;
@@ -173,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
/* Determine permissions allowed by Encoded Access Authority */
if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) {
*prot = 0;
- } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) {
+ } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) ||
+ partition_scoped) {
*prot = ppc_radix64_get_prot_eaa(pte);
} else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */
*prot = ppc_radix64_get_prot_eaa(pte);
@@ -299,7 +300,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
ppc_v3_pate_t pate,
hwaddr *h_raddr, int *h_prot,
int *h_page_size, bool pde_addr,
- bool guest_visible)
+ int mmu_idx, bool guest_visible)
{
int fault_cause = 0;
hwaddr pte_addr;
@@ -310,7 +311,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
&pte, &fault_cause, &pte_addr) ||
- ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) {
+ ppc_radix64_check_prot(cpu, access_type, pte,
+ &fault_cause, h_prot, mmu_idx, true)) {
if (pde_addr) { /* address being translated was that of a guest pde */
fault_cause |= DSISR_PRTABLE_FAULT;
}
@@ -332,7 +334,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
vaddr eaddr, uint64_t pid,
ppc_v3_pate_t pate, hwaddr *g_raddr,
int *g_prot, int *g_page_size,
- bool guest_visible)
+ int mmu_idx, bool guest_visible)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
@@ -367,7 +369,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
pate, &h_raddr, &h_prot,
&h_page_size, true,
- guest_visible);
+ /* mmu_idx is 5 because we're translating from hypervisor scope */
+ 5, guest_visible);
if (ret) {
return ret;
}
@@ -407,7 +410,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
pate, &h_raddr, &h_prot,
&h_page_size, true,
- guest_visible);
+ /* mmu_idx is 5 because we're translating from hypervisor scope */
+ 5, guest_visible);
if (ret) {
return ret;
}
@@ -431,7 +435,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
*g_raddr = (rpn & ~mask) | (eaddr & mask);
}
- if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) {
+ if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause,
+ g_prot, mmu_idx, false)) {
/* Access denied due to protection */
if (guest_visible) {
ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
@@ -463,24 +468,53 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
* | = On | Process Scoped | Scoped |
* +-------------+----------------+---------------+
*/
-static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr,
- MMUAccessType access_type,
- bool relocation,
- hwaddr *raddr, int *psizep, int *protp,
- bool guest_visible)
+bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddr, int *psizep, int *protp, int mmu_idx,
+ bool guest_visible)
{
CPUPPCState *env = &cpu->env;
uint64_t lpid, pid;
ppc_v3_pate_t pate;
int psize, prot;
hwaddr g_raddr;
+ bool relocation;
+
+ assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp));
+
+ relocation = !mmuidx_real(mmu_idx);
+
+ /* HV or virtual hypervisor Real Mode Access */
+ if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) {
+ /* In real mode top 4 effective addr bits (mostly) ignored */
+ *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
+
+ /* In HV mode, add HRMOR if top EA bit is clear */
+ if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
+ if (!(eaddr >> 63)) {
+ *raddr |= env->spr[SPR_HRMOR];
+ }
+ }
+ *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ *psizep = TARGET_PAGE_BITS;
+ return true;
+ }
+
+ /*
+ * Check UPRT (we avoid the check in real mode to deal with
+ * transitional states during kexec.
+ */
+ if (guest_visible && !ppc64_use_proc_tbl(cpu)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "LPCR:UPRT not set in radix mode ! LPCR="
+ TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
+ }
/* Virtual Mode Access - get the fully qualified address */
if (!ppc_radix64_get_fully_qualified_addr(&cpu->env, eaddr, &lpid, &pid)) {
if (guest_visible) {
ppc_radix64_raise_segi(cpu, access_type, eaddr);
}
- return 1;
+ return false;
}
/* Get Process Table */
@@ -493,13 +527,13 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr,
if (guest_visible) {
ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
}
- return 1;
+ return false;
}
if (!validate_pate(cpu, lpid, &pate)) {
if (guest_visible) {
ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
}
- return 1;
+ return false;
}
}
@@ -517,9 +551,9 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr,
if (relocation) {
int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
pate, &g_raddr, &prot,
- &psize, guest_visible);
+ &psize, mmu_idx, guest_visible);
if (ret) {
- return ret;
+ return false;
}
*psizep = MIN(*psizep, psize);
*protp &= prot;
@@ -535,15 +569,15 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr,
* quadrants 1 or 2. Translates a guest real address to a host
* real address.
*/
- if (lpid || !msr_hv) {
+ if (lpid || !mmuidx_hv(mmu_idx)) {
int ret;
ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
g_raddr, pate, raddr,
&prot, &psize, false,
- guest_visible);
+ mmu_idx, guest_visible);
if (ret) {
- return ret;
+ return false;
}
*psizep = MIN(*psizep, psize);
*protp &= prot;
@@ -552,78 +586,5 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr,
}
}
- return 0;
-}
-
-int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
- int mmu_idx)
-{
- CPUState *cs = CPU(cpu);
- CPUPPCState *env = &cpu->env;
- int page_size, prot;
- bool relocation;
- MMUAccessType access_type;
- hwaddr raddr;
-
- assert(!(msr_hv && cpu->vhyp));
- assert((rwx == 0) || (rwx == 1) || (rwx == 2));
- access_type = rwx;
-
- relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr);
- /* HV or virtual hypervisor Real Mode Access */
- if (!relocation && (msr_hv || cpu->vhyp)) {
- /* In real mode top 4 effective addr bits (mostly) ignored */
- raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
-
- /* In HV mode, add HRMOR if top EA bit is clear */
- if (msr_hv || !env->has_hv_mode) {
- if (!(eaddr >> 63)) {
- raddr |= env->spr[SPR_HRMOR];
- }
- }
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
- PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
- TARGET_PAGE_SIZE);
- return 0;
- }
-
- /*
- * Check UPRT (we avoid the check in real mode to deal with
- * transitional states during kexec.
- */
- if (!ppc64_use_proc_tbl(cpu)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "LPCR:UPRT not set in radix mode ! LPCR="
- TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
- }
-
- /* Translate eaddr to raddr (where raddr is addr qemu needs for access) */
- if (ppc_radix64_xlate(cpu, eaddr, access_type, relocation, &raddr,
- &page_size, &prot, true)) {
- return 1;
- }
-
- tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
- prot, mmu_idx, 1UL << page_size);
- return 0;
-}
-
-hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr)
-{
- CPUPPCState *env = &cpu->env;
- int psize, prot;
- hwaddr raddr;
-
- /* Handle Real Mode */
- if ((msr_dr == 0) && (msr_hv || cpu->vhyp)) {
- /* In real mode top 4 effective addr bits (mostly) ignored */
- return eaddr & 0x0FFFFFFFFFFFFFFFULL;
- }
-
- if (ppc_radix64_xlate(cpu, eaddr, 0, msr_dr, &raddr, &psize,
- &prot, false)) {
- return -1;
- }
-
- return raddr & TARGET_PAGE_MASK;
+ return true;
}
diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
index f28c579..b70357c 100644
--- a/target/ppc/mmu-radix64.h
+++ b/target/ppc/mmu-radix64.h
@@ -44,9 +44,9 @@
#ifdef TARGET_PPC64
-int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
- int mmu_idx);
-hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
+bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddr, int *psizep, int *protp, int mmu_idx,
+ bool guest_visible);
static inline int ppc_radix64_get_prot_eaa(uint64_t pte)
{
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index 1ecb36e..869d24d 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -511,7 +511,7 @@ static int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
"\n", ppc_hash32_hpt_base(cpu),
- ppc_hash32_hpt_mask(env) + 0x80);
+ ppc_hash32_hpt_mask(cpu) + 0x80);
for (curaddr = ppc_hash32_hpt_base(cpu);
curaddr < (ppc_hash32_hpt_base(cpu)
+ ppc_hash32_hpt_mask(cpu) + 0x80);
@@ -825,6 +825,7 @@ static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
return ret;
}
+#ifdef CONFIG_TCG
static void booke206_flush_tlb(CPUPPCState *env, int flags,
const int check_iprot)
{
@@ -846,6 +847,7 @@ static void booke206_flush_tlb(CPUPPCState *env, int flags,
tlb_flush(env_cpu(env));
}
+#endif
static hwaddr booke206_tlb_to_page_size(CPUPPCState *env,
ppcmas_tlb_t *tlb)
@@ -1435,48 +1437,6 @@ static int get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
}
#endif
-hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
-{
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- CPUPPCState *env = &cpu->env;
- mmu_ctx_t ctx;
-
- switch (env->mmu_model) {
-#if defined(TARGET_PPC64)
- case POWERPC_MMU_64B:
- case POWERPC_MMU_2_03:
- case POWERPC_MMU_2_06:
- case POWERPC_MMU_2_07:
- return ppc_hash64_get_phys_page_debug(cpu, addr);
- case POWERPC_MMU_3_00:
- return ppc64_v3_get_phys_page_debug(cpu, addr);
-#endif
-
- case POWERPC_MMU_32B:
- case POWERPC_MMU_601:
- return ppc_hash32_get_phys_page_debug(cpu, addr);
-
- default:
- ;
- }
-
- if (unlikely(get_physical_address(env, &ctx, addr, MMU_DATA_LOAD,
- ACCESS_INT) != 0)) {
-
- /*
- * Some MMUs have separate TLBs for code and data. If we only
- * try an ACCESS_INT, we may not be able to read instructions
- * mapped by code TLBs, so we also try a ACCESS_CODE.
- */
- if (unlikely(get_physical_address(env, &ctx, addr, MMU_INST_FETCH,
- ACCESS_CODE) != 0)) {
- return -1;
- }
- }
-
- return ctx.raddr & TARGET_PAGE_MASK;
-}
-
static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address,
MMUAccessType access_type, int mmu_idx)
{
@@ -1532,30 +1492,38 @@ static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address,
}
/* Perform address translation */
-static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
- MMUAccessType access_type, int mmu_idx)
+/* TODO: Split this by mmu_model. */
+static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
+ MMUAccessType access_type,
+ hwaddr *raddrp, int *psizep, int *protp,
+ int mmu_idx, bool guest_visible)
{
- CPUState *cs = env_cpu(env);
- PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
mmu_ctx_t ctx;
int type;
- int ret = 0;
+ int ret;
if (access_type == MMU_INST_FETCH) {
/* code access */
type = ACCESS_CODE;
- } else {
+ } else if (guest_visible) {
/* data access */
type = env->access_type;
+ } else {
+ type = ACCESS_INT;
}
- ret = get_physical_address_wtlb(env, &ctx, address, access_type,
+
+ ret = get_physical_address_wtlb(env, &ctx, eaddr, access_type,
type, mmu_idx);
if (ret == 0) {
- tlb_set_page(cs, address & TARGET_PAGE_MASK,
- ctx.raddr & TARGET_PAGE_MASK, ctx.prot,
- mmu_idx, TARGET_PAGE_SIZE);
- ret = 0;
- } else if (ret < 0) {
+ *raddrp = ctx.raddr;
+ *protp = ctx.prot;
+ *psizep = TARGET_PAGE_BITS;
+ return true;
+ }
+
+ if (guest_visible) {
LOG_MMU_STATE(cs);
if (type == ACCESS_CODE) {
switch (ret) {
@@ -1565,7 +1533,7 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
case POWERPC_MMU_SOFT_6xx:
cs->exception_index = POWERPC_EXCP_IFTLB;
env->error_code = 1 << 18;
- env->spr[SPR_IMISS] = address;
+ env->spr[SPR_IMISS] = eaddr;
env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem;
goto tlb_miss;
case POWERPC_MMU_SOFT_74xx:
@@ -1575,29 +1543,25 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
case POWERPC_MMU_SOFT_4xx_Z:
cs->exception_index = POWERPC_EXCP_ITLB;
env->error_code = 0;
- env->spr[SPR_40x_DEAR] = address;
+ env->spr[SPR_40x_DEAR] = eaddr;
env->spr[SPR_40x_ESR] = 0x00000000;
break;
case POWERPC_MMU_BOOKE206:
- booke206_update_mas_tlb_miss(env, address, 2, mmu_idx);
+ booke206_update_mas_tlb_miss(env, eaddr, 2, mmu_idx);
/* fall through */
case POWERPC_MMU_BOOKE:
cs->exception_index = POWERPC_EXCP_ITLB;
env->error_code = 0;
- env->spr[SPR_BOOKE_DEAR] = address;
+ env->spr[SPR_BOOKE_DEAR] = eaddr;
env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, MMU_DATA_LOAD);
- return -1;
+ break;
case POWERPC_MMU_MPC8xx:
- /* XXX: TODO */
cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
- break;
case POWERPC_MMU_REAL:
cpu_abort(cs, "PowerPC in real mode should never raise "
"any MMU exceptions\n");
- return -1;
default:
cpu_abort(cs, "Unknown or invalid MMU model\n");
- return -1;
}
break;
case -2:
@@ -1634,7 +1598,7 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
cs->exception_index = POWERPC_EXCP_DLTLB;
env->error_code = 0;
}
- env->spr[SPR_DMISS] = address;
+ env->spr[SPR_DMISS] = eaddr;
env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
tlb_miss:
env->error_code |= ctx.key << 19;
@@ -1652,7 +1616,7 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
tlb_miss_74xx:
/* Implement LRU algorithm */
env->error_code = ctx.key << 19;
- env->spr[SPR_TLBMISS] = (address & ~((target_ulong)0x3)) |
+ env->spr[SPR_TLBMISS] = (eaddr & ~((target_ulong)0x3)) |
((env->last_way + 1) & (env->nb_ways - 1));
env->spr[SPR_PTEHI] = 0x80000000 | ctx.ptem;
break;
@@ -1660,7 +1624,7 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
case POWERPC_MMU_SOFT_4xx_Z:
cs->exception_index = POWERPC_EXCP_DTLB;
env->error_code = 0;
- env->spr[SPR_40x_DEAR] = address;
+ env->spr[SPR_40x_DEAR] = eaddr;
if (access_type == MMU_DATA_STORE) {
env->spr[SPR_40x_ESR] = 0x00800000;
} else {
@@ -1670,23 +1634,20 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
case POWERPC_MMU_MPC8xx:
/* XXX: TODO */
cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
- break;
case POWERPC_MMU_BOOKE206:
- booke206_update_mas_tlb_miss(env, address, access_type, mmu_idx);
+ booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
/* fall through */
case POWERPC_MMU_BOOKE:
cs->exception_index = POWERPC_EXCP_DTLB;
env->error_code = 0;
- env->spr[SPR_BOOKE_DEAR] = address;
+ env->spr[SPR_BOOKE_DEAR] = eaddr;
env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
- return -1;
+ break;
case POWERPC_MMU_REAL:
cpu_abort(cs, "PowerPC in real mode should never raise "
"any MMU exceptions\n");
- return -1;
default:
cpu_abort(cs, "Unknown or invalid MMU model\n");
- return -1;
}
break;
case -2:
@@ -1695,16 +1656,16 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
env->error_code = 0;
if (env->mmu_model == POWERPC_MMU_SOFT_4xx
|| env->mmu_model == POWERPC_MMU_SOFT_4xx_Z) {
- env->spr[SPR_40x_DEAR] = address;
+ env->spr[SPR_40x_DEAR] = eaddr;
if (access_type == MMU_DATA_STORE) {
env->spr[SPR_40x_ESR] |= 0x00800000;
}
} else if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
(env->mmu_model == POWERPC_MMU_BOOKE206)) {
- env->spr[SPR_BOOKE_DEAR] = address;
+ env->spr[SPR_BOOKE_DEAR] = eaddr;
env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
} else {
- env->spr[SPR_DAR] = address;
+ env->spr[SPR_DAR] = eaddr;
if (access_type == MMU_DATA_STORE) {
env->spr[SPR_DSISR] = 0x0A000000;
} else {
@@ -1719,13 +1680,13 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
/* Floating point load/store */
cs->exception_index = POWERPC_EXCP_ALIGN;
env->error_code = POWERPC_EXCP_ALIGN_FP;
- env->spr[SPR_DAR] = address;
+ env->spr[SPR_DAR] = eaddr;
break;
case ACCESS_RES:
/* lwarx, ldarx or stwcx. */
cs->exception_index = POWERPC_EXCP_DSI;
env->error_code = 0;
- env->spr[SPR_DAR] = address;
+ env->spr[SPR_DAR] = eaddr;
if (access_type == MMU_DATA_STORE) {
env->spr[SPR_DSISR] = 0x06000000;
} else {
@@ -1736,7 +1697,7 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
/* eciwx or ecowx */
cs->exception_index = POWERPC_EXCP_DSI;
env->error_code = 0;
- env->spr[SPR_DAR] = address;
+ env->spr[SPR_DAR] = eaddr;
if (access_type == MMU_DATA_STORE) {
env->spr[SPR_DSISR] = 0x06100000;
} else {
@@ -1748,16 +1709,14 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
cs->exception_index = POWERPC_EXCP_PROGRAM;
env->error_code =
POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL;
- env->spr[SPR_DAR] = address;
+ env->spr[SPR_DAR] = eaddr;
break;
}
break;
}
}
- ret = 1;
}
-
- return ret;
+ return false;
}
#ifdef CONFIG_TCG
@@ -1798,9 +1757,6 @@ static inline void dump_store_bat(CPUPPCState *env, char ID, int ul, int nr,
void helper_store_ibatu(CPUPPCState *env, uint32_t nr, target_ulong value)
{
target_ulong mask;
-#if defined(FLUSH_ALL_TLBS)
- PowerPCCPU *cpu = env_archcpu(env);
-#endif
dump_store_bat(env, 'I', 0, nr, value);
if (env->IBAT[0][nr] != value) {
@@ -1834,9 +1790,6 @@ void helper_store_ibatl(CPUPPCState *env, uint32_t nr, target_ulong value)
void helper_store_dbatu(CPUPPCState *env, uint32_t nr, target_ulong value)
{
target_ulong mask;
-#if defined(FLUSH_ALL_TLBS)
- PowerPCCPU *cpu = env_archcpu(env);
-#endif
dump_store_bat(env, 'D', 0, nr, value);
if (env->DBAT[0][nr] != value) {
@@ -1871,7 +1824,6 @@ void helper_store_601_batu(CPUPPCState *env, uint32_t nr, target_ulong value)
{
target_ulong mask;
#if defined(FLUSH_ALL_TLBS)
- PowerPCCPU *cpu = env_archcpu(env);
int do_inval;
#endif
@@ -1916,7 +1868,6 @@ void helper_store_601_batl(CPUPPCState *env, uint32_t nr, target_ulong value)
#if !defined(FLUSH_ALL_TLBS)
target_ulong mask;
#else
- PowerPCCPU *cpu = env_archcpu(env);
int do_inval;
#endif
@@ -1952,6 +1903,7 @@ void helper_store_601_batl(CPUPPCState *env, uint32_t nr, target_ulong value)
}
#endif
+#ifdef CONFIG_TCG
/*****************************************************************************/
/* TLB management */
void ppc_tlb_invalidate_all(CPUPPCState *env)
@@ -1995,6 +1947,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
break;
}
}
+#endif
#ifdef CONFIG_TCG
void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
@@ -2942,26 +2895,76 @@ void helper_check_tlb_flush_global(CPUPPCState *env)
/*****************************************************************************/
-bool ppc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
+static bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+ hwaddr *raddrp, int *psizep, int *protp,
+ int mmu_idx, bool guest_visible)
+{
+ switch (cpu->env.mmu_model) {
+#if defined(TARGET_PPC64)
+ case POWERPC_MMU_3_00:
+ if (ppc64_v3_radix(cpu)) {
+ return ppc_radix64_xlate(cpu, eaddr, access_type,
+ raddrp, psizep, protp, mmu_idx, guest_visible);
+ }
+ /* fall through */
+ case POWERPC_MMU_64B:
+ case POWERPC_MMU_2_03:
+ case POWERPC_MMU_2_06:
+ case POWERPC_MMU_2_07:
+ return ppc_hash64_xlate(cpu, eaddr, access_type,
+ raddrp, psizep, protp, mmu_idx, guest_visible);
+#endif
+
+ case POWERPC_MMU_32B:
+ case POWERPC_MMU_601:
+ return ppc_hash32_xlate(cpu, eaddr, access_type,
+ raddrp, psizep, protp, mmu_idx, guest_visible);
+
+ default:
+ return ppc_jumbo_xlate(cpu, eaddr, access_type, raddrp,
+ psizep, protp, mmu_idx, guest_visible);
+ }
+}
+
+hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ hwaddr raddr;
+ int s, p;
+
+ /*
+ * Some MMUs have separate TLBs for code and data. If we only
+ * try an MMU_DATA_LOAD, we may not be able to read instructions
+ * mapped by code TLBs, so we also try a MMU_INST_FETCH.
+ */
+ if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p,
+ cpu_mmu_index(&cpu->env, false), false) ||
+ ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p,
+ cpu_mmu_index(&cpu->env, true), false)) {
+ return raddr & TARGET_PAGE_MASK;
+ }
+ return -1;
+}
+
+#ifdef CONFIG_TCG
+bool ppc_cpu_tlb_fill(CPUState *cs, vaddr eaddr, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
- CPUPPCState *env = &cpu->env;
- int ret;
+ hwaddr raddr;
+ int page_size, prot;
- if (pcc->handle_mmu_fault) {
- ret = pcc->handle_mmu_fault(cpu, addr, access_type, mmu_idx);
- } else {
- ret = cpu_ppc_handle_mmu_fault(env, addr, access_type, mmu_idx);
+ if (ppc_xlate(cpu, eaddr, access_type, &raddr,
+ &page_size, &prot, mmu_idx, !probe)) {
+ tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
+ prot, mmu_idx, 1UL << page_size);
+ return true;
}
- if (unlikely(ret != 0)) {
- if (probe) {
- return false;
- }
- raise_exception_err_ra(env, cs->exception_index, env->error_code,
- retaddr);
+ if (probe) {
+ return false;
}
- return true;
+ raise_exception_err_ra(&cpu->env, cs->exception_index,
+ cpu->env.error_code, retaddr);
}
+#endif
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index f65d1e8..d1f482b 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4940,6 +4940,11 @@ static void gen_mtcrf(DisasContext *ctx)
#if defined(TARGET_PPC64)
static void gen_mtmsrd(DisasContext *ctx)
{
+ if (unlikely(!is_book3s_arch2x(ctx))) {
+ gen_invalid(ctx);
+ return;
+ }
+
CHK_SV;
#if !defined(CONFIG_USER_ONLY)
diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT
index b9dd9b3..cc12237 100644
--- a/tests/data/acpi/pc/DSDT
+++ b/tests/data/acpi/pc/DSDT
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat
index cba5a1d..2d0678e 100644
--- a/tests/data/acpi/pc/DSDT.acpihmat
+++ b/tests/data/acpi/pc/DSDT.acpihmat
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge
index a9b4d56..77778c3 100644
--- a/tests/data/acpi/pc/DSDT.bridge
+++ b/tests/data/acpi/pc/DSDT.bridge
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp
index 8d86155..af046b4 100644
--- a/tests/data/acpi/pc/DSDT.cphp
+++ b/tests/data/acpi/pc/DSDT.cphp
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm
index e00a447..b56b2e0 100644
--- a/tests/data/acpi/pc/DSDT.dimmpxm
+++ b/tests/data/acpi/pc/DSDT.dimmpxm
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge
index 5d8ba19..bb0593e 100644
--- a/tests/data/acpi/pc/DSDT.hpbridge
+++ b/tests/data/acpi/pc/DSDT.hpbridge
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs
index 01e53bd..2e618e4 100644
--- a/tests/data/acpi/pc/DSDT.ipmikcs
+++ b/tests/data/acpi/pc/DSDT.ipmikcs
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp
index b810379..c32d285 100644
--- a/tests/data/acpi/pc/DSDT.memhp
+++ b/tests/data/acpi/pc/DSDT.memhp
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/pc/DSDT.nohpet
index d4f0050..623f06a 100644
--- a/tests/data/acpi/pc/DSDT.nohpet
+++ b/tests/data/acpi/pc/DSDT.nohpet
Binary files differ
diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem
index 8632dfe..f0a3fa9 100644
--- a/tests/data/acpi/pc/DSDT.numamem
+++ b/tests/data/acpi/pc/DSDT.numamem
Binary files differ
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index d53f61d..4cf4d53 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -21,6 +21,7 @@ Testing:
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -48,6 +49,7 @@ Testing: -fda TEST_DIR/t.qcow2
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -85,6 +87,7 @@ Testing: -fdb TEST_DIR/t.qcow2
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -96,6 +99,7 @@ Testing: -fdb TEST_DIR/t.qcow2
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -137,6 +141,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -148,6 +153,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -190,6 +196,7 @@ Testing: -fdb
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -201,6 +208,7 @@ Testing: -fdb
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -228,6 +236,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -265,6 +274,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -276,6 +286,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -317,6 +328,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -328,6 +340,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -373,6 +386,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -410,6 +424,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
dev: floppy, id ""
unit = 1 (0x1)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -447,6 +462,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
dev: floppy, id ""
unit = 1 (0x1)
drive = "none1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -458,6 +474,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -509,6 +526,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 1 (0x1)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -520,6 +538,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -562,6 +581,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 1 (0x1)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -573,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -615,6 +636,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -626,6 +648,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -668,6 +691,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -679,6 +703,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
dev: floppy, id ""
unit = 1 (0x1)
drive = "floppy1"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -730,6 +755,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
dev: floppy, id ""
unit = 1 (0x1)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -741,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -783,6 +810,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
dev: floppy, id ""
unit = 1 (0x1)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -794,6 +822,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
dev: floppy, id ""
unit = 0 (0x0)
drive = "floppy0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -842,6 +871,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -909,6 +939,7 @@ Testing: -device floppy
dev: floppy, id ""
unit = 0 (0x0)
drive = ""
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -933,6 +964,7 @@ Testing: -device floppy,drive-type=120
dev: floppy, id ""
unit = 0 (0x0)
drive = ""
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -957,6 +989,7 @@ Testing: -device floppy,drive-type=144
dev: floppy, id ""
unit = 0 (0x0)
drive = ""
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -981,6 +1014,7 @@ Testing: -device floppy,drive-type=288
dev: floppy, id ""
unit = 0 (0x0)
drive = ""
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -1008,6 +1042,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -1045,6 +1080,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -1085,6 +1121,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
@@ -1122,6 +1159,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
dev: floppy, id ""
unit = 0 (0x0)
drive = "none0"
+ backend_defaults = "auto"
logical_block_size = 512 (512 B)
physical_block_size = 512 (512 B)
min_io_size = 0 (0 B)
diff --git a/tests/qtest/rtas-test.c b/tests/qtest/rtas-test.c
index 16751db..5f1194a 100644
--- a/tests/qtest/rtas-test.c
+++ b/tests/qtest/rtas-test.c
@@ -5,7 +5,7 @@
#include "libqos/libqos-spapr.h"
#include "libqos/rtas.h"
-static void test_rtas_get_time_of_day(void)
+static void run_test_rtas_get_time_of_day(const char *machine)
{
QOSState *qs;
struct tm tm;
@@ -13,7 +13,7 @@ static void test_rtas_get_time_of_day(void)
uint64_t ret;
time_t t1, t2;
- qs = qtest_spapr_boot("-machine pseries");
+ qs = qtest_spapr_boot(machine);
t1 = time(NULL);
ret = qrtas_get_time_of_day(qs->qts, &qs->alloc, &tm, &ns);
@@ -24,6 +24,16 @@ static void test_rtas_get_time_of_day(void)
qtest_shutdown(qs);
}
+static void test_rtas_get_time_of_day(void)
+{
+ run_test_rtas_get_time_of_day("-machine pseries");
+}
+
+static void test_rtas_get_time_of_day_vof(void)
+{
+ run_test_rtas_get_time_of_day("-machine pseries,x-vof=on");
+}
+
int main(int argc, char *argv[])
{
const char *arch = qtest_get_arch();
@@ -35,6 +45,7 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
qtest_add_func("rtas/get-time-of-day", test_rtas_get_time_of_day);
+ qtest_add_func("rtas/get-time-of-day-vof", test_rtas_get_time_of_day_vof);
return g_test_run();
}
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index 7f801a4..2a3ef58 100644
--- a/tests/unit/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
@@ -108,7 +108,7 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
return deadline;
}
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
{
QEMUBH *bh = g_new(QEMUBH, 1);
diff --git a/util/async.c b/util/async.c
index 5d9b7cc..9a41591 100644
--- a/util/async.c
+++ b/util/async.c
@@ -57,6 +57,7 @@ enum {
struct QEMUBH {
AioContext *ctx;
+ const char *name;
QEMUBHFunc *cb;
void *opaque;
QSLIST_ENTRY(QEMUBH) next;
@@ -107,7 +108,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
return bh;
}
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
+ void *opaque, const char *name)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
@@ -115,11 +117,13 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
.ctx = ctx,
.cb = cb,
.opaque = opaque,
+ .name = name,
};
aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
}
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+ const char *name)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
@@ -127,6 +131,7 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
.ctx = ctx,
.cb = cb,
.opaque = opaque,
+ .name = name,
};
return bh;
}
@@ -339,8 +344,20 @@ aio_ctx_finalize(GSource *source)
assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
- /* qemu_bh_delete() must have been called on BHs in this AioContext */
- assert(flags & BH_DELETED);
+ /*
+ * qemu_bh_delete() must have been called on BHs in this AioContext. In
+ * many cases memory leaks, hangs, or inconsistent state occur when a
+ * BH is leaked because something still expects it to run.
+ *
+ * If you hit this, fix the lifecycle of the BH so that
+ * qemu_bh_delete() and any associated cleanup is called before the
+ * AioContext is finalized.
+ */
+ if (unlikely(!(flags & BH_DELETED))) {
+ fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
+ __func__, bh->name);
+ abort();
+ }
g_free(bh);
}
diff --git a/util/main-loop.c b/util/main-loop.c
index 4ae5b23..06b18b1 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -544,9 +544,9 @@ void main_loop_wait(int nonblocking)
/* Functions to operate on the main QEMU AioContext. */
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
{
- return aio_bh_new(qemu_aio_context, cb, opaque);
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
}
/*
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 838e286..893d864 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -225,6 +225,8 @@ static void *mmap_activate(void *ptr, size_t size, int fd,
"crash.\n", file_name);
g_free(proc_link);
g_free(file_name);
+ warn_report("Using non DAX backing file with 'pmem=on' option"
+ " is deprecated");
}
/*
* If mmap failed with MAP_SHARED_VALIDATE | MAP_SYNC, we will try