12 files changed, 521 insertions, 33 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f0e30b5..1c56d45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1554,7 +1554,8 @@ F: include/hw/virtio/virtio-input.h
 F: contrib/vhost-user-input/*
 
 virtio-serial
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
@@ -1563,7 +1564,8 @@ F: tests/virtio-console-test.c
 F: tests/virtio-serial-test.c
 
 virtio-rng
-M: Amit Shah <amit@kernel.org>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 0a13506..2bf3406 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -46,7 +46,7 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,
 
     b->vdev = vdev;
     b->dev.nvqs = nvqs;
-    b->dev.vqs = g_new(struct vhost_virtqueue, nvqs);
+    b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs);
 
     ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
     if (ret < 0) {
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index b531cac..362e991 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -171,6 +171,35 @@ guest software that this vNVDIMM device contains a region that cannot
 accept persistent writes. In result, for example, the guest Linux
 NVDIMM driver, marks such vNVDIMM device as read-only.
 
+Backend File Setup Example
+--------------------------
+
+Here are two examples showing how to setup these persistent backends on
+linux using the tool ndctl [3].
+
+A. DAX device
+
+Use the following command to set up /dev/dax0.0 so that the entirety of
+namespace0.0 can be exposed as an emulated NVDIMM to the guest:
+
+    ndctl create-namespace -f -e namespace0.0 -m devdax
+
+The /dev/dax0.0 could be used directly in "mem-path" option.
+
+B. DAX file
+
+Individual files on a DAX host file system can be exposed as emulated
+NVDIMMS.  First an fsdax block device is created, partitioned, and then
+mounted with the "dax" mount option:
+
+    ndctl create-namespace -f -e namespace0.0 -m fsdax
+    (partition /dev/pmem0 with name pmem0p1)
+    mount -o dax /dev/pmem0p1 /mnt
+    (create or copy a disk image file with qemu-img(1), cp(1), or dd(1)
+     in /mnt)
+
+Then the new file in /mnt could be used in "mem-path" option.
+
 NVDIMM Persistence
 ------------------
 
@@ -212,3 +241,5 @@ References
     https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
 [2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
     http://pmem.io/pmdk/
+[3] ndctl-create-namespace - provision or reconfigure a namespace
+    http://pmem.io/ndctl/ndctl-create-namespace.html
diff --git a/docs/virtio-pmem.rst b/docs/virtio-pmem.rst
new file mode 100644
index 0000000..e77881b
--- /dev/null
+++ b/docs/virtio-pmem.rst
@@ -0,0 +1,75 @@
+
+========================
+QEMU virtio pmem
+========================
+
+ This document explains the setup and usage of the virtio pmem device
+ which is available since QEMU v4.1.0.
+
+ The virtio pmem device is a paravirtualized persistent memory device
+ on regular (i.e non-NVDIMM) storage.
+
+Usecase
+--------
+
+  Virtio pmem allows to bypass the guest page cache and directly use
+  host page cache. This reduces guest memory footprint as the host can
+  make efficient memory reclaim decisions under memory pressure.
+
+o How does virtio-pmem compare to the nvdimm emulation supported by QEMU?
+
+  NVDIMM emulation on regular (i.e. non-NVDIMM) host storage does not
+  persist the guest writes as there are no defined semantics in the device
+  specification. The virtio pmem device provides guest write persistence
+  on non-NVDIMM host storage.
+
+virtio pmem usage
+-----------------
+
+  A virtio pmem device backed by a memory-backend-file can be created on
+  the QEMU command line as in the following example:
+
+  -object memory-backend-file,id=mem1,share,mem-path=./virtio_pmem.img,size=4G
+  -device virtio-pmem-pci,memdev=mem1,id=nv1
+
+   where:
+   - "object memory-backend-file,id=mem1,share,mem-path=<image>, size=<image size>"
+     creates a backend file with the specified size.
+
+   - "device virtio-pmem-pci,id=nvdimm1,memdev=mem1" creates a virtio pmem
+     pci device whose storage is provided by above memory backend device.
+
+  Multiple virtio pmem devices can be created if multiple pairs of "-object"
+  and "-device" are provided.
+
+Hotplug
+-------
+
+Virtio pmem devices can be hotplugged via the QEMU monitor. First, the
+memory backing has to be added via 'object_add'; afterwards, the virtio
+pmem device can be added via 'device_add'.
+
+For example, the following commands add another 4GB virtio pmem device to
+the guest:
+
+ (qemu) object_add memory-backend-file,id=mem2,share=on,mem-path=virtio_pmem2.img,size=4G
+ (qemu) device_add virtio-pmem-pci,id=virtio_pmem2,memdev=mem2
+
+Guest Data Persistence
+----------------------
+
+ Guest data persistence on non-NVDIMM requires guest userspace applications
+ to perform fsync/msync. This is different from a real nvdimm backend where
+ no additional fsync/msync is required. This is to persist guest writes in
+ host backing file which otherwise remains in host page cache and there is
+ risk of losing the data in case of power failure.
+
+ With virtio pmem device, MAP_SYNC mmap flag is not supported. This provides
+ a hint to application to perform fsync for write persistence.
+
+Limitations
+------------
+- Real nvdimm device backend is not supported.
+- virtio pmem hotunplug is not supported.
+- ACPI NVDIMM features like regions/namespaces are not supported.
+- ndctl command is not supported.
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0b8c5df..63da9bb 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -421,7 +421,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     s->inflight = g_new0(struct vhost_inflight, 1);
-    s->vqs = g_new(struct vhost_virtqueue, s->num_queues);
+    s->vqs = g_new0(struct vhost_virtqueue, s->num_queues);
     s->watch = 0;
     s->connected = false;
 
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 60d66c2..cbad6c1 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -237,6 +237,23 @@ HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
     return NULL;
 }
 
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
+{
+    MachineState *machine;
+    MachineClass *mc;
+    Object *m_obj = qdev_get_machine();
+
+    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+        machine = MACHINE(m_obj);
+        mc = MACHINE_GET_CLASS(machine);
+        if (mc->hotplug_allowed) {
+            return mc->hotplug_allowed(machine, dev, errp);
+        }
+    }
+
+    return true;
+}
+
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev)
 {
     if (dev->parent_bus) {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 75ca6f9..f1de8fd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "hw/i386/x86-iommu.h"
 #include "hw/pci-host/q35.h"
 #include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm_i386.h"
 #include "migration/vmstate.h"
@@ -64,6 +65,13 @@
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
+static void vtd_panic_require_caching_mode(void)
+{
+    error_report("We need to set caching-mode=on for intel-iommu to enable "
+                 "device assignment with IOMMU protection.");
+    exit(1);
+}
+
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
 {
@@ -2928,12 +2936,6 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
 
-    if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
-        error_report("We need to set caching-mode=on for intel-iommu to enable "
-                     "device assignment with IOMMU protection.");
-        exit(1);
-    }
-
     /* Update per-address-space notifier flags */
     vtd_as->notifier_flags = new;
 
@@ -3699,6 +3701,32 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
     return true;
 }
 
+static int vtd_machine_done_notify_one(Object *child, void *unused)
+{
+    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+    /*
+     * We hard-coded here because vfio-pci is the only special case
+     * here.  Let's be more elegant in the future when we can, but so
+     * far there seems to be no better way.
+     */
+    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
+        vtd_panic_require_caching_mode();
+    }
+
+    return 0;
+}
+
+static void vtd_machine_done_hook(Notifier *notifier, void *unused)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   vtd_machine_done_notify_one, NULL);
+}
+
+static Notifier vtd_machine_done_notify = {
+    .notify = vtd_machine_done_hook,
+};
+
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
     MachineState *ms = MACHINE(qdev_get_machine());
@@ -3744,6 +3772,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
     /* Pseudo address space under root PCI bus. */
     pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
+    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
 }
 
 static void vtd_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e5b59ff..bcda50e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2756,6 +2756,26 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
     }
 }
 
+
+static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
+{
+    X86IOMMUState *iommu = x86_iommu_get_default();
+    IntelIOMMUState *intel_iommu;
+
+    if (iommu &&
+        object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
+        object_dynamic_cast((Object *)dev, "vfio-pci")) {
+        intel_iommu = INTEL_IOMMU_DEVICE(iommu);
+        if (!intel_iommu->caching_mode) {
+            error_setg(errp, "Device assignment is not allowed without "
+                       "enabling caching-mode=on for Intel IOMMU.");
+            return false;
+        }
+    }
+
+    return true;
+}
+
 static void pc_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2780,6 +2800,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     pcmc->pvh_enabled = true;
     assert(!mc->get_hotplug_handler);
     mc->get_hotplug_handler = pc_get_hotplug_handler;
+    mc->hotplug_allowed = pc_hotplug_allowed;
     mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
     mc->get_default_cpu_node_id = pc_get_default_cpu_node_id;
     mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index eccc795..3d5ca0f 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -50,14 +50,24 @@
         OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO)
 
 #define VIRT_MAGIC 0x74726976 /* 'virt' */
-#define VIRT_VERSION 1
+#define VIRT_VERSION 2
+#define VIRT_VERSION_LEGACY 1
 #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */
 
+typedef struct VirtIOMMIOQueue {
+    uint16_t num;
+    bool enabled;
+    uint32_t desc[2];
+    uint32_t avail[2];
+    uint32_t used[2];
+} VirtIOMMIOQueue;
+
 typedef struct {
     /* Generic */
     SysBusDevice parent_obj;
     MemoryRegion iomem;
     qemu_irq irq;
+    bool legacy;
     /* Guest accessible state needing migration and reset */
     uint32_t host_features_sel;
     uint32_t guest_features_sel;
@@ -65,6 +75,9 @@ typedef struct {
     /* virtio-bus */
     VirtioBusState bus;
     bool format_transport_address;
+    /* Fields only used for non-legacy (v2) devices */
+    uint32_t guest_features[2];
+    VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX];
 } VirtIOMMIOProxy;
 
 static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
@@ -118,7 +131,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
         case VIRTIO_MMIO_MAGIC_VALUE:
             return VIRT_MAGIC;
         case VIRTIO_MMIO_VERSION:
-            return VIRT_VERSION;
+            if (proxy->legacy) {
+                return VIRT_VERSION_LEGACY;
+            } else {
+                return VIRT_VERSION;
+            }
         case VIRTIO_MMIO_VENDOR_ID:
             return VIRT_VENDOR;
         default:
@@ -149,28 +166,64 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
     case VIRTIO_MMIO_MAGIC_VALUE:
         return VIRT_MAGIC;
     case VIRTIO_MMIO_VERSION:
-        return VIRT_VERSION;
+        if (proxy->legacy) {
+            return VIRT_VERSION_LEGACY;
+        } else {
+            return VIRT_VERSION;
+        }
     case VIRTIO_MMIO_DEVICE_ID:
         return vdev->device_id;
     case VIRTIO_MMIO_VENDOR_ID:
         return VIRT_VENDOR;
     case VIRTIO_MMIO_DEVICE_FEATURES:
-        if (proxy->host_features_sel) {
-            return 0;
+        if (proxy->legacy) {
+            if (proxy->host_features_sel) {
+                return 0;
+            } else {
+                return vdev->host_features;
+            }
+        } else {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+            return (vdev->host_features & ~vdc->legacy_features)
+                >> (32 * proxy->host_features_sel);
         }
-        return vdev->host_features;
     case VIRTIO_MMIO_QUEUE_NUM_MAX:
         if (!virtio_queue_get_num(vdev, vdev->queue_sel)) {
             return 0;
         }
         return VIRTQUEUE_MAX_SIZE;
     case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
         return virtio_queue_get_addr(vdev, vdev->queue_sel)
             >> proxy->guest_page_shift;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return proxy->vqs[vdev->queue_sel].enabled;
     case VIRTIO_MMIO_INTERRUPT_STATUS:
         return atomic_read(&vdev->isr);
     case VIRTIO_MMIO_STATUS:
         return vdev->status;
+    case VIRTIO_MMIO_CONFIG_GENERATION:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return vdev->generation;
     case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
     case VIRTIO_MMIO_DRIVER_FEATURES:
     case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
@@ -180,12 +233,20 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
     case VIRTIO_MMIO_QUEUE_ALIGN:
     case VIRTIO_MMIO_QUEUE_NOTIFY:
     case VIRTIO_MMIO_INTERRUPT_ACK:
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: read of write-only register\n",
-                      __func__);
+                      "%s: read of write-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         return 0;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         return 0;
     }
     return 0;
@@ -232,17 +293,41 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
     }
     switch (offset) {
     case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
-        proxy->host_features_sel = value;
+        if (value) {
+            proxy->host_features_sel = 1;
+        } else {
+            proxy->host_features_sel = 0;
+        }
         break;
     case VIRTIO_MMIO_DRIVER_FEATURES:
-        if (!proxy->guest_features_sel) {
-            virtio_set_features(vdev, value);
+        if (proxy->legacy) {
+            if (proxy->guest_features_sel) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: attempt to write guest features with "
+                              "guest_features_sel > 0 in legacy mode\n",
+                              __func__);
+            } else {
+                virtio_set_features(vdev, value);
+            }
+        } else {
+            proxy->guest_features[proxy->guest_features_sel] = value;
         }
         break;
     case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
-        proxy->guest_features_sel = value;
+        if (value) {
+            proxy->guest_features_sel = 1;
+        } else {
+            proxy->guest_features_sel = 0;
+        }
         break;
     case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         proxy->guest_page_shift = ctz32(value);
         if (proxy->guest_page_shift > 31) {
             proxy->guest_page_shift = 0;
@@ -256,15 +341,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
         break;
     case VIRTIO_MMIO_QUEUE_NUM:
         trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
-        virtio_queue_set_num(vdev, vdev->queue_sel, value);
-        /* Note: only call this function for legacy devices */
-        virtio_queue_update_rings(vdev, vdev->queue_sel);
+        if (proxy->legacy) {
+            virtio_queue_set_num(vdev, vdev->queue_sel, value);
+            virtio_queue_update_rings(vdev, vdev->queue_sel);
+        } else {
+            proxy->vqs[vdev->queue_sel].num = value;
+        }
         break;
     case VIRTIO_MMIO_QUEUE_ALIGN:
-        /* Note: this is only valid for legacy devices */
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         virtio_queue_set_align(vdev, vdev->queue_sel, value);
         break;
     case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
         if (value == 0) {
             virtio_reset(vdev);
         } else {
@@ -272,6 +373,29 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
                                   value << proxy->guest_page_shift);
         }
         break;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        if (value) {
+            virtio_queue_set_num(vdev, vdev->queue_sel,
+                                 proxy->vqs[vdev->queue_sel].num);
+            virtio_queue_set_rings(vdev, vdev->queue_sel,
+                ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].desc[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].avail[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].used[0]);
+            proxy->vqs[vdev->queue_sel].enabled = 1;
+        } else {
+            proxy->vqs[vdev->queue_sel].enabled = 0;
+        }
+        break;
     case VIRTIO_MMIO_QUEUE_NOTIFY:
         if (value < VIRTIO_QUEUE_MAX) {
             virtio_queue_notify(vdev, value);
@@ -286,6 +410,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
             virtio_mmio_stop_ioeventfd(proxy);
         }
 
+        if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) {
+            virtio_set_features(vdev,
+                                ((uint64_t)proxy->guest_features[1]) << 32 |
+                                proxy->guest_features[0]);
+        }
+
         virtio_set_status(vdev, value & 0xff);
 
         if (value & VIRTIO_CONFIG_S_DRIVER_OK) {
@@ -296,6 +426,66 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
             virtio_reset(vdev);
         }
         break;
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[1] = value;
+        break;
     case VIRTIO_MMIO_MAGIC_VALUE:
     case VIRTIO_MMIO_VERSION:
     case VIRTIO_MMIO_DEVICE_ID:
@@ -303,22 +493,31 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
     case VIRTIO_MMIO_DEVICE_FEATURES:
     case VIRTIO_MMIO_QUEUE_NUM_MAX:
     case VIRTIO_MMIO_INTERRUPT_STATUS:
+    case VIRTIO_MMIO_CONFIG_GENERATION:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: write to readonly register\n",
-                      __func__);
+                      "%s: write to read-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: bad register offset\n", __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
     }
 }
 
-static const MemoryRegionOps virtio_mem_ops = {
+static const MemoryRegionOps virtio_legacy_mem_ops = {
     .read = virtio_mmio_read,
     .write = virtio_mmio_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static const MemoryRegionOps virtio_mem_ops = {
+    .read = virtio_mmio_read,
+    .write = virtio_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
@@ -352,15 +551,90 @@ static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f)
     qemu_put_be32(f, proxy->guest_page_shift);
 }
 
+static const VMStateDescription vmstate_virtio_mmio_queue_state = {
+    .name = "virtio_mmio/queue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(num, VirtIOMMIOQueue),
+        VMSTATE_BOOL(enabled, VirtIOMMIOQueue),
+        VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio_state_sub = {
+    .name = "virtio_mmio/state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2),
+        VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0,
+                             vmstate_virtio_mmio_queue_state,
+                             VirtIOMMIOQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio = {
+    .name = "virtio_mmio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_mmio_state_sub,
+        NULL
+    }
+};
+
+static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL);
+}
+
+static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+}
+
+static bool virtio_mmio_has_extra_state(DeviceState *opaque)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return !proxy->legacy;
+}
+
 static void virtio_mmio_reset(DeviceState *d)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    int i;
 
     virtio_mmio_stop_ioeventfd(proxy);
     virtio_bus_reset(&proxy->bus);
     proxy->host_features_sel = 0;
     proxy->guest_features_sel = 0;
     proxy->guest_page_shift = 0;
+
+    if (!proxy->legacy) {
+        proxy->guest_features[0] = proxy->guest_features[1] = 0;
+
+        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+            proxy->vqs[i].enabled = 0;
+            proxy->vqs[i].num = 0;
+            proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
+            proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
+            proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
+        }
+    }
 }
 
 static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
@@ -423,11 +697,22 @@ assign_error:
     return r;
 }
 
+static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (!proxy->legacy) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+}
+
 /* virtio-mmio device */
 
 static Property virtio_mmio_properties[] = {
     DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
                      format_transport_address, true),
+    DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -439,8 +724,15 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
     qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS,
                         d, NULL);
     sysbus_init_irq(sbd, &proxy->irq);
-    memory_region_init_io(&proxy->iomem, OBJECT(d), &virtio_mem_ops, proxy,
-                          TYPE_VIRTIO_MMIO, 0x200);
+    if (proxy->legacy) {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_legacy_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    } else {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    }
     sysbus_init_mmio(sbd, &proxy->iomem);
 }
 
@@ -511,9 +803,13 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
     k->notify = virtio_mmio_update_irq;
     k->save_config = virtio_mmio_save_config;
     k->load_config = virtio_mmio_load_config;
+    k->save_extra_state = virtio_mmio_save_extra_state;
+    k->load_extra_state = virtio_mmio_load_extra_state;
+    k->has_extra_state = virtio_mmio_has_extra_state;
     k->set_guest_notifiers = virtio_mmio_set_guest_notifiers;
     k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
+    k->pre_plugged = virtio_mmio_pre_plugged;
     k->has_variable_vring_alignment = true;
     bus_class->max_dev = 1;
     bus_class->get_dev_path = virtio_mmio_bus_get_dev_path;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2289536..be18a5c 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -166,6 +166,13 @@ typedef struct {
  *    The function pointer to hook different machine specific functions for
  *    parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
  *    machine specific topology fields, such as smp_dies for PCMachine.
+ * @hotplug_allowed:
+ *    If the hook is provided, then it'll be called for each device
+ *    hotplug to check whether the device hotplug is allowed.  Return
+ *    true to grant allowance or false to reject the hotplug.  When
+ *    false is returned, an error must be set to show the reason of
+ *    the rejection.  If the hook is not provided, all hotplug will be
+ *    allowed.
  */
 struct MachineClass {
     /*< private >*/
@@ -224,6 +231,8 @@ struct MachineClass {
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
+    bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
+                            Error **errp);
     CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
                                                          unsigned cpu_index);
     const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index de70b7a..aa123f8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -280,6 +280,7 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
                                  int required_for_version);
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev);
 HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
 /**
  * qdev_get_hotplug_handler: Get handler responsible for device wiring
  *
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 8fe5c2c..148df9c 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -615,6 +615,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
     /* create device */
     dev = DEVICE(object_new(driver));
 
+    /* Check whether the hotplug is allowed by the machine */
+    if (qdev_hotplug && !qdev_hotplug_allowed(dev, &err)) {
+        /* Error must be set in the machine hook */
+        assert(err);
+        goto err_del_dev;
+    }
+
     if (bus) {
         qdev_set_parent_bus(dev, bus);
     } else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {