aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS6
-rw-r--r--backends/vhost-user.c2
-rw-r--r--docs/nvdimm.txt31
-rw-r--r--docs/virtio-pmem.rst75
-rw-r--r--hw/block/vhost-user-blk.c2
-rw-r--r--hw/core/qdev.c17
-rw-r--r--hw/i386/intel_iommu.c41
-rw-r--r--hw/i386/pc.c21
-rw-r--r--hw/virtio/virtio-mmio.c342
-rw-r--r--include/hw/boards.h9
-rw-r--r--include/hw/qdev-core.h1
-rw-r--r--qdev-monitor.c7
12 files changed, 521 insertions, 33 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f0e30b5..1c56d45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1554,7 +1554,8 @@ F: include/hw/virtio/virtio-input.h
F: contrib/vhost-user-input/*
virtio-serial
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
S: Supported
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
@@ -1563,7 +1564,8 @@ F: tests/virtio-console-test.c
F: tests/virtio-serial-test.c
virtio-rng
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
S: Supported
F: hw/virtio/virtio-rng.c
F: include/hw/virtio/virtio-rng.h
diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 0a13506..2bf3406 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -46,7 +46,7 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,
b->vdev = vdev;
b->dev.nvqs = nvqs;
- b->dev.vqs = g_new(struct vhost_virtqueue, nvqs);
+ b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs);
ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
if (ret < 0) {
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index b531cac..362e991 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -171,6 +171,35 @@ guest software that this vNVDIMM device contains a region that cannot
accept persistent writes. In result, for example, the guest Linux
NVDIMM driver, marks such vNVDIMM device as read-only.
+Backend File Setup Example
+--------------------------
+
+Here are two examples showing how to setup these persistent backends on
+linux using the tool ndctl [3].
+
+A. DAX device
+
+Use the following command to set up /dev/dax0.0 so that the entirety of
+namespace0.0 can be exposed as an emulated NVDIMM to the guest:
+
+ ndctl create-namespace -f -e namespace0.0 -m devdax
+
+The /dev/dax0.0 could be used directly in "mem-path" option.
+
+B. DAX file
+
+Individual files on a DAX host file system can be exposed as emulated
+NVDIMMS. First an fsdax block device is created, partitioned, and then
+mounted with the "dax" mount option:
+
+ ndctl create-namespace -f -e namespace0.0 -m fsdax
+ (partition /dev/pmem0 with name pmem0p1)
+ mount -o dax /dev/pmem0p1 /mnt
+ (create or copy a disk image file with qemu-img(1), cp(1), or dd(1)
+ in /mnt)
+
+Then the new file in /mnt could be used in "mem-path" option.
+
NVDIMM Persistence
------------------
@@ -212,3 +241,5 @@ References
https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
http://pmem.io/pmdk/
+[3] ndctl-create-namespace - provision or reconfigure a namespace
+ http://pmem.io/ndctl/ndctl-create-namespace.html
diff --git a/docs/virtio-pmem.rst b/docs/virtio-pmem.rst
new file mode 100644
index 0000000..e77881b
--- /dev/null
+++ b/docs/virtio-pmem.rst
@@ -0,0 +1,75 @@
+
+========================
+QEMU virtio pmem
+========================
+
+ This document explains the setup and usage of the virtio pmem device
+ which is available since QEMU v4.1.0.
+
+ The virtio pmem device is a paravirtualized persistent memory device
+ on regular (i.e non-NVDIMM) storage.
+
+Usecase
+--------
+
+ Virtio pmem allows to bypass the guest page cache and directly use
+ host page cache. This reduces guest memory footprint as the host can
+ make efficient memory reclaim decisions under memory pressure.
+
+o How does virtio-pmem compare to the nvdimm emulation supported by QEMU?
+
+ NVDIMM emulation on regular (i.e. non-NVDIMM) host storage does not
+ persist the guest writes as there are no defined semantics in the device
+ specification. The virtio pmem device provides guest write persistence
+ on non-NVDIMM host storage.
+
+virtio pmem usage
+-----------------
+
+ A virtio pmem device backed by a memory-backend-file can be created on
+ the QEMU command line as in the following example:
+
+ -object memory-backend-file,id=mem1,share,mem-path=./virtio_pmem.img,size=4G
+ -device virtio-pmem-pci,memdev=mem1,id=nv1
+
+ where:
+ - "object memory-backend-file,id=mem1,share,mem-path=<image>, size=<image size>"
+ creates a backend file with the specified size.
+
+ - "device virtio-pmem-pci,id=nvdimm1,memdev=mem1" creates a virtio pmem
+ pci device whose storage is provided by above memory backend device.
+
+ Multiple virtio pmem devices can be created if multiple pairs of "-object"
+ and "-device" are provided.
+
+Hotplug
+-------
+
+Virtio pmem devices can be hotplugged via the QEMU monitor. First, the
+memory backing has to be added via 'object_add'; afterwards, the virtio
+pmem device can be added via 'device_add'.
+
+For example, the following commands add another 4GB virtio pmem device to
+the guest:
+
+ (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=virtio_pmem2.img,size=4G
+ (qemu) device_add virtio-pmem-pci,id=virtio_pmem2,memdev=mem2
+
+Guest Data Persistence
+----------------------
+
+ Guest data persistence on non-NVDIMM requires guest userspace applications
+ to perform fsync/msync. This is different from a real nvdimm backend where
+ no additional fsync/msync is required. This is to persist guest writes in
+ host backing file which otherwise remains in host page cache and there is
+ risk of losing the data in case of power failure.
+
+ With virtio pmem device, MAP_SYNC mmap flag is not supported. This provides
+ a hint to application to perform fsync for write persistence.
+
+Limitations
+------------
+- Real nvdimm device backend is not supported.
+- virtio pmem hotunplug is not supported.
+- ACPI NVDIMM features like regions/namespaces are not supported.
+- ndctl command is not supported.
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0b8c5df..63da9bb 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -421,7 +421,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
}
s->inflight = g_new0(struct vhost_inflight, 1);
- s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
+ s->vqs = g_new0(struct vhost_virtqueue, s->num_queues);
s->watch = 0;
s->connected = false;
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 60d66c2..cbad6c1 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -237,6 +237,23 @@ HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
return NULL;
}
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
+{
+ MachineState *machine;
+ MachineClass *mc;
+ Object *m_obj = qdev_get_machine();
+
+ if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+ machine = MACHINE(m_obj);
+ mc = MACHINE_GET_CLASS(machine);
+ if (mc->hotplug_allowed) {
+ return mc->hotplug_allowed(machine, dev, errp);
+ }
+ }
+
+ return true;
+}
+
HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev)
{
if (dev->parent_bus) {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 75ca6f9..f1de8fd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
#include "hw/i386/x86-iommu.h"
#include "hw/pci-host/q35.h"
#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
#include "hw/i386/apic_internal.h"
#include "kvm_i386.h"
#include "migration/vmstate.h"
@@ -64,6 +65,13 @@
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
+static void vtd_panic_require_caching_mode(void)
+{
+ error_report("We need to set caching-mode=on for intel-iommu to enable "
+ "device assignment with IOMMU protection.");
+ exit(1);
+}
+
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
uint64_t wmask, uint64_t w1cmask)
{
@@ -2928,12 +2936,6 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
- if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
- error_report("We need to set caching-mode=on for intel-iommu to enable "
- "device assignment with IOMMU protection.");
- exit(1);
- }
-
/* Update per-address-space notifier flags */
vtd_as->notifier_flags = new;
@@ -3699,6 +3701,32 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return true;
}
+static int vtd_machine_done_notify_one(Object *child, void *unused)
+{
+ IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+ /*
+ * We hard-coded here because vfio-pci is the only special case
+ * here. Let's be more elegant in the future when we can, but so
+ * far there seems to be no better way.
+ */
+ if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
+ vtd_panic_require_caching_mode();
+ }
+
+ return 0;
+}
+
+static void vtd_machine_done_hook(Notifier *notifier, void *unused)
+{
+ object_child_foreach_recursive(object_get_root(),
+ vtd_machine_done_notify_one, NULL);
+}
+
+static Notifier vtd_machine_done_notify = {
+ .notify = vtd_machine_done_hook,
+};
+
static void vtd_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
@@ -3744,6 +3772,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
/* Pseudo address space under root PCI bus. */
pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
+ qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
}
static void vtd_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e5b59ff..bcda50e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2756,6 +2756,26 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
}
}
+
+static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
+{
+ X86IOMMUState *iommu = x86_iommu_get_default();
+ IntelIOMMUState *intel_iommu;
+
+ if (iommu &&
+ object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
+ object_dynamic_cast((Object *)dev, "vfio-pci")) {
+ intel_iommu = INTEL_IOMMU_DEVICE(iommu);
+ if (!intel_iommu->caching_mode) {
+ error_setg(errp, "Device assignment is not allowed without "
+ "enabling caching-mode=on for Intel IOMMU.");
+ return false;
+ }
+ }
+
+ return true;
+}
+
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -2780,6 +2800,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->pvh_enabled = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = pc_get_hotplug_handler;
+ mc->hotplug_allowed = pc_hotplug_allowed;
mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index eccc795..3d5ca0f 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -50,14 +50,24 @@
OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO)
#define VIRT_MAGIC 0x74726976 /* 'virt' */
-#define VIRT_VERSION 1
+#define VIRT_VERSION 2
+#define VIRT_VERSION_LEGACY 1
#define VIRT_VENDOR 0x554D4551 /* 'QEMU' */
+typedef struct VirtIOMMIOQueue {
+ uint16_t num;
+ bool enabled;
+ uint32_t desc[2];
+ uint32_t avail[2];
+ uint32_t used[2];
+} VirtIOMMIOQueue;
+
typedef struct {
/* Generic */
SysBusDevice parent_obj;
MemoryRegion iomem;
qemu_irq irq;
+ bool legacy;
/* Guest accessible state needing migration and reset */
uint32_t host_features_sel;
uint32_t guest_features_sel;
@@ -65,6 +75,9 @@ typedef struct {
/* virtio-bus */
VirtioBusState bus;
bool format_transport_address;
+ /* Fields only used for non-legacy (v2) devices */
+ uint32_t guest_features[2];
+ VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX];
} VirtIOMMIOProxy;
static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
@@ -118,7 +131,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
case VIRTIO_MMIO_MAGIC_VALUE:
return VIRT_MAGIC;
case VIRTIO_MMIO_VERSION:
- return VIRT_VERSION;
+ if (proxy->legacy) {
+ return VIRT_VERSION_LEGACY;
+ } else {
+ return VIRT_VERSION;
+ }
case VIRTIO_MMIO_VENDOR_ID:
return VIRT_VENDOR;
default:
@@ -149,28 +166,64 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
case VIRTIO_MMIO_MAGIC_VALUE:
return VIRT_MAGIC;
case VIRTIO_MMIO_VERSION:
- return VIRT_VERSION;
+ if (proxy->legacy) {
+ return VIRT_VERSION_LEGACY;
+ } else {
+ return VIRT_VERSION;
+ }
case VIRTIO_MMIO_DEVICE_ID:
return vdev->device_id;
case VIRTIO_MMIO_VENDOR_ID:
return VIRT_VENDOR;
case VIRTIO_MMIO_DEVICE_FEATURES:
- if (proxy->host_features_sel) {
- return 0;
+ if (proxy->legacy) {
+ if (proxy->host_features_sel) {
+ return 0;
+ } else {
+ return vdev->host_features;
+ }
+ } else {
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+ return (vdev->host_features & ~vdc->legacy_features)
+ >> (32 * proxy->host_features_sel);
}
- return vdev->host_features;
case VIRTIO_MMIO_QUEUE_NUM_MAX:
if (!virtio_queue_get_num(vdev, vdev->queue_sel)) {
return 0;
}
return VIRTQUEUE_MAX_SIZE;
case VIRTIO_MMIO_QUEUE_PFN:
+ if (!proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: read from legacy register (0x%"
+ HWADDR_PRIx ") in non-legacy mode\n",
+ __func__, offset);
+ return 0;
+ }
return virtio_queue_get_addr(vdev, vdev->queue_sel)
>> proxy->guest_page_shift;
+ case VIRTIO_MMIO_QUEUE_READY:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: read from non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return 0;
+ }
+ return proxy->vqs[vdev->queue_sel].enabled;
case VIRTIO_MMIO_INTERRUPT_STATUS:
return atomic_read(&vdev->isr);
case VIRTIO_MMIO_STATUS:
return vdev->status;
+ case VIRTIO_MMIO_CONFIG_GENERATION:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: read from non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return 0;
+ }
+ return vdev->generation;
case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
case VIRTIO_MMIO_DRIVER_FEATURES:
case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
@@ -180,12 +233,20 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
case VIRTIO_MMIO_QUEUE_ALIGN:
case VIRTIO_MMIO_QUEUE_NOTIFY:
case VIRTIO_MMIO_INTERRUPT_ACK:
+ case VIRTIO_MMIO_QUEUE_DESC_LOW:
+ case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+ case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+ case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+ case VIRTIO_MMIO_QUEUE_USED_LOW:
+ case VIRTIO_MMIO_QUEUE_USED_HIGH:
qemu_log_mask(LOG_GUEST_ERROR,
- "%s: read of write-only register\n",
- __func__);
+ "%s: read of write-only register (0x%" HWADDR_PRIx ")\n",
+ __func__, offset);
return 0;
default:
- qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+ __func__, offset);
return 0;
}
return 0;
@@ -232,17 +293,41 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
}
switch (offset) {
case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
- proxy->host_features_sel = value;
+ if (value) {
+ proxy->host_features_sel = 1;
+ } else {
+ proxy->host_features_sel = 0;
+ }
break;
case VIRTIO_MMIO_DRIVER_FEATURES:
- if (!proxy->guest_features_sel) {
- virtio_set_features(vdev, value);
+ if (proxy->legacy) {
+ if (proxy->guest_features_sel) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: attempt to write guest features with "
+ "guest_features_sel > 0 in legacy mode\n",
+ __func__);
+ } else {
+ virtio_set_features(vdev, value);
+ }
+ } else {
+ proxy->guest_features[proxy->guest_features_sel] = value;
}
break;
case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
- proxy->guest_features_sel = value;
+ if (value) {
+ proxy->guest_features_sel = 1;
+ } else {
+ proxy->guest_features_sel = 0;
+ }
break;
case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+ if (!proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to legacy register (0x%"
+ HWADDR_PRIx ") in non-legacy mode\n",
+ __func__, offset);
+ return;
+ }
proxy->guest_page_shift = ctz32(value);
if (proxy->guest_page_shift > 31) {
proxy->guest_page_shift = 0;
@@ -256,15 +341,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
break;
case VIRTIO_MMIO_QUEUE_NUM:
trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
- virtio_queue_set_num(vdev, vdev->queue_sel, value);
- /* Note: only call this function for legacy devices */
- virtio_queue_update_rings(vdev, vdev->queue_sel);
+ if (proxy->legacy) {
+ virtio_queue_set_num(vdev, vdev->queue_sel, value);
+ virtio_queue_update_rings(vdev, vdev->queue_sel);
+ } else {
+ proxy->vqs[vdev->queue_sel].num = value;
+ }
break;
case VIRTIO_MMIO_QUEUE_ALIGN:
- /* Note: this is only valid for legacy devices */
+ if (!proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to legacy register (0x%"
+ HWADDR_PRIx ") in non-legacy mode\n",
+ __func__, offset);
+ return;
+ }
virtio_queue_set_align(vdev, vdev->queue_sel, value);
break;
case VIRTIO_MMIO_QUEUE_PFN:
+ if (!proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to legacy register (0x%"
+ HWADDR_PRIx ") in non-legacy mode\n",
+ __func__, offset);
+ return;
+ }
if (value == 0) {
virtio_reset(vdev);
} else {
@@ -272,6 +373,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
value << proxy->guest_page_shift);
}
break;
+ case VIRTIO_MMIO_QUEUE_READY:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ if (value) {
+ virtio_queue_set_num(vdev, vdev->queue_sel,
+ proxy->vqs[vdev->queue_sel].num);
+ virtio_queue_set_rings(vdev, vdev->queue_sel,
+ ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
+ proxy->vqs[vdev->queue_sel].desc[0],
+ ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
+ proxy->vqs[vdev->queue_sel].avail[0],
+ ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
+ proxy->vqs[vdev->queue_sel].used[0]);
+ proxy->vqs[vdev->queue_sel].enabled = 1;
+ } else {
+ proxy->vqs[vdev->queue_sel].enabled = 0;
+ }
+ break;
case VIRTIO_MMIO_QUEUE_NOTIFY:
if (value < VIRTIO_QUEUE_MAX) {
virtio_queue_notify(vdev, value);
@@ -286,6 +410,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
virtio_mmio_stop_ioeventfd(proxy);
}
+ if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) {
+ virtio_set_features(vdev,
+ ((uint64_t)proxy->guest_features[1]) << 32 |
+ proxy->guest_features[0]);
+ }
+
virtio_set_status(vdev, value & 0xff);
if (value & VIRTIO_CONFIG_S_DRIVER_OK) {
@@ -296,6 +426,66 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
virtio_reset(vdev);
}
break;
+ case VIRTIO_MMIO_QUEUE_DESC_LOW:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ proxy->vqs[vdev->queue_sel].desc[0] = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ proxy->vqs[vdev->queue_sel].desc[1] = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ proxy->vqs[vdev->queue_sel].avail[0] = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ proxy->vqs[vdev->queue_sel].avail[1] = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_USED_LOW:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ proxy->vqs[vdev->queue_sel].used[0] = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_USED_HIGH:
+ if (proxy->legacy) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: write to non-legacy register (0x%"
+ HWADDR_PRIx ") in legacy mode\n",
+ __func__, offset);
+ return;
+ }
+ proxy->vqs[vdev->queue_sel].used[1] = value;
+ break;
case VIRTIO_MMIO_MAGIC_VALUE:
case VIRTIO_MMIO_VERSION:
case VIRTIO_MMIO_DEVICE_ID:
@@ -303,22 +493,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
case VIRTIO_MMIO_DEVICE_FEATURES:
case VIRTIO_MMIO_QUEUE_NUM_MAX:
case VIRTIO_MMIO_INTERRUPT_STATUS:
+ case VIRTIO_MMIO_CONFIG_GENERATION:
qemu_log_mask(LOG_GUEST_ERROR,
- "%s: write to readonly register\n",
- __func__);
+ "%s: write to read-only register (0x%" HWADDR_PRIx ")\n",
+ __func__, offset);
break;
default:
- qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+ __func__, offset);
}
}
-static const MemoryRegionOps virtio_mem_ops = {
+static const MemoryRegionOps virtio_legacy_mem_ops = {
.read = virtio_mmio_read,
.write = virtio_mmio_write,
.endianness = DEVICE_NATIVE_ENDIAN,
};
+static const MemoryRegionOps virtio_mem_ops = {
+ .read = virtio_mmio_read,
+ .write = virtio_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
{
VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
@@ -352,15 +551,90 @@ static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f)
qemu_put_be32(f, proxy->guest_page_shift);
}
+static const VMStateDescription vmstate_virtio_mmio_queue_state = {
+ .name = "virtio_mmio/queue_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT16(num, VirtIOMMIOQueue),
+ VMSTATE_BOOL(enabled, VirtIOMMIOQueue),
+ VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2),
+ VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2),
+ VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_virtio_mmio_state_sub = {
+ .name = "virtio_mmio/state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2),
+ VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0,
+ vmstate_virtio_mmio_queue_state,
+ VirtIOMMIOQueue),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_virtio_mmio = {
+ .name = "virtio_mmio",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_virtio_mmio_state_sub,
+ NULL
+ }
+};
+
+static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+ vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL);
+}
+
+static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+ return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+}
+
+static bool virtio_mmio_has_extra_state(DeviceState *opaque)
+{
+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+ return !proxy->legacy;
+}
+
static void virtio_mmio_reset(DeviceState *d)
{
VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+ int i;
virtio_mmio_stop_ioeventfd(proxy);
virtio_bus_reset(&proxy->bus);
proxy->host_features_sel = 0;
proxy->guest_features_sel = 0;
proxy->guest_page_shift = 0;
+
+ if (!proxy->legacy) {
+ proxy->guest_features[0] = proxy->guest_features[1] = 0;
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ proxy->vqs[i].enabled = 0;
+ proxy->vqs[i].num = 0;
+ proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
+ proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
+ proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
+ }
+ }
}
static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
@@ -423,11 +697,22 @@ assign_error:
return r;
}
+static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp)
+{
+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+ if (!proxy->legacy) {
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+ }
+}
+
/* virtio-mmio device */
static Property virtio_mmio_properties[] = {
DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
format_transport_address, true),
+ DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -439,8 +724,15 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS,
d, NULL);
sysbus_init_irq(sbd, &proxy->irq);
- memory_region_init_io(&proxy->iomem, OBJECT(d), &virtio_mem_ops, proxy,
- TYPE_VIRTIO_MMIO, 0x200);
+ if (proxy->legacy) {
+ memory_region_init_io(&proxy->iomem, OBJECT(d),
+ &virtio_legacy_mem_ops, proxy,
+ TYPE_VIRTIO_MMIO, 0x200);
+ } else {
+ memory_region_init_io(&proxy->iomem, OBJECT(d),
+ &virtio_mem_ops, proxy,
+ TYPE_VIRTIO_MMIO, 0x200);
+ }
sysbus_init_mmio(sbd, &proxy->iomem);
}
@@ -511,9 +803,13 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
k->notify = virtio_mmio_update_irq;
k->save_config = virtio_mmio_save_config;
k->load_config = virtio_mmio_load_config;
+ k->save_extra_state = virtio_mmio_save_extra_state;
+ k->load_extra_state = virtio_mmio_load_extra_state;
+ k->has_extra_state = virtio_mmio_has_extra_state;
k->set_guest_notifiers = virtio_mmio_set_guest_notifiers;
k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
+ k->pre_plugged = virtio_mmio_pre_plugged;
k->has_variable_vring_alignment = true;
bus_class->max_dev = 1;
bus_class->get_dev_path = virtio_mmio_bus_get_dev_path;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2289536..be18a5c 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -166,6 +166,13 @@ typedef struct {
* The function pointer to hook different machine specific functions for
* parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
* machine specific topology fields, such as smp_dies for PCMachine.
+ * @hotplug_allowed:
+ * If the hook is provided, then it'll be called for each device
+ * hotplug to check whether the device hotplug is allowed. Return
+ * true to grant allowance or false to reject the hotplug. When
+ * false is returned, an error must be set to show the reason of
+ * the rejection. If the hook is not provided, all hotplug will be
+ * allowed.
*/
struct MachineClass {
/*< private >*/
@@ -224,6 +231,8 @@ struct MachineClass {
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
+ bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
+ Error **errp);
CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
unsigned cpu_index);
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index de70b7a..aa123f8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -280,6 +280,7 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
int required_for_version);
HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev);
HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
/**
* qdev_get_hotplug_handler: Get handler responsible for device wiring
*
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 8fe5c2c..148df9c 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -615,6 +615,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
/* create device */
dev = DEVICE(object_new(driver));
+ /* Check whether the hotplug is allowed by the machine */
+ if (qdev_hotplug && !qdev_hotplug_allowed(dev, &err)) {
+ /* Error must be set in the machine hook */
+ assert(err);
+ goto err_del_dev;
+ }
+
if (bus) {
qdev_set_parent_bus(dev, bus);
} else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {