aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-07-21 20:12:37 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-07-21 20:12:37 +0100
commit206d0c24361a083fbdcb2cc86fb75dc8b7f251a2 (patch)
tree75dd4919f09372b4ef9928084ece6c7999ff76fc /hw
parent7239247a2ba2fd1c269edda3b6fd816c5fd51baf (diff)
parentbc38ee10fc26338e21c01485540f815be1f3db28 (diff)
downloadqemu-206d0c24361a083fbdcb2cc86fb75dc8b7f251a2.zip
qemu-206d0c24361a083fbdcb2cc86fb75dc8b7f251a2.tar.gz
qemu-206d0c24361a083fbdcb2cc86fb75dc8b7f251a2.tar.bz2
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: new features, cleanups, fixes - interrupt remapping for intel iommus - a bunch of virtio cleanups - fixes all over the place Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Thu 21 Jul 2016 18:49:30 BST # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (57 commits) intel_iommu: avoid unnamed fields virtio: Update migration docs virtio-gpu: Wrap in vmstate virtio-gpu: Use migrate_add_blocker for virgl migration blocking virtio-input: Wrap in vmstate 9pfs: Wrap in vmstate virtio-serial: Wrap in vmstate virtio-net: Wrap in vmstate virtio-balloon: Wrap in vmstate virtio-rng: Wrap in vmstate virtio-blk: Wrap in vmstate virtio-scsi: Wrap in vmstate virtio: Migration helper function and macro virtio-serial: Remove old migration version support virtio-net: Remove old migration version support virtio-scsi: Replace HandleOutput typedef Revert "mirror: Workaround for unexpected iohandler events during completion" virtio-scsi: Call virtio_add_queue_aio virtio-blk: Call virtio_add_queue_aio virtio: Introduce virtio_add_queue_aio ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/virtio-9p-device.c14
-rw-r--r--hw/alpha/typhoon.c2
-rw-r--r--hw/block/virtio-blk.c18
-rw-r--r--hw/char/virtio-serial-bus.c62
-rw-r--r--hw/display/virtio-gpu.c36
-rw-r--r--hw/i386/Makefile.objs2
-rw-r--r--hw/i386/acpi-build.c43
-rw-r--r--hw/i386/intel_iommu.c437
-rw-r--r--hw/i386/intel_iommu_internal.h50
-rw-r--r--hw/i386/kvm/pci-assign.c10
-rw-r--r--hw/i386/pc.c3
-rw-r--r--hw/i386/trace-events3
-rw-r--r--hw/i386/x86-iommu.c128
-rw-r--r--hw/input/virtio-input.c26
-rw-r--r--hw/intc/ioapic.c135
-rw-r--r--hw/mem/nvdimm.c1
-rw-r--r--hw/mips/gt64xxx_pci.c2
-rw-r--r--hw/misc/ivshmem.c4
-rw-r--r--hw/net/virtio-net.c102
-rw-r--r--hw/pci-host/apb.c15
-rw-r--r--hw/pci-host/grackle.c2
-rw-r--r--hw/pci-host/prep.c1
-rw-r--r--hw/pci-host/versatile.c1
-rw-r--r--hw/pci/pci.c15
-rw-r--r--hw/scsi/virtio-scsi.c35
-rw-r--r--hw/vfio/pci.c12
-rw-r--r--hw/virtio/virtio-balloon.c19
-rw-r--r--hw/virtio/virtio-pci.c10
-rw-r--r--hw/virtio/virtio-rng.c20
-rw-r--r--hw/virtio/virtio.c51
30 files changed, 941 insertions, 318 deletions
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 494e85e..009b43f 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -97,14 +97,9 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)
g_free(cfg);
}
-static void virtio_9p_save(QEMUFile *f, void *opaque)
+static int virtio_9p_load(QEMUFile *f, void *opaque, size_t size)
{
- virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
-static int virtio_9p_load(QEMUFile *f, void *opaque, int version_id)
-{
- return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+ return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
}
static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
@@ -120,7 +115,6 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag);
virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size);
v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output);
- register_savevm(dev, "virtio-9p", -1, 1, virtio_9p_save, virtio_9p_load, v);
out:
return;
@@ -133,7 +127,6 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp)
V9fsState *s = &v->state;
virtio_cleanup(vdev);
- unregister_savevm(dev, "virtio-9p", v);
v9fs_device_unrealize_common(s, errp);
}
@@ -175,6 +168,8 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
/* virtio-9p device */
+VMSTATE_VIRTIO_DEVICE(9p, 1, virtio_9p_load, virtio_vmstate_save);
+
static Property virtio_9p_properties[] = {
DEFINE_PROP_STRING("mount_tag", V9fsVirtioState, state.fsconf.tag),
DEFINE_PROP_STRING("fsdev", V9fsVirtioState, state.fsconf.fsdev_id),
@@ -187,6 +182,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_9p_properties;
+ dc->vmsd = &vmstate_virtio_9p;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = virtio_9p_device_realize;
vdc->unrealize = virtio_9p_device_unrealize;
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 97721b5..883db13 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -824,7 +824,6 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
int i;
dev = qdev_create(NULL, TYPE_TYPHOON_PCI_HOST_BRIDGE);
- qdev_init_nofail(dev);
s = TYPHOON_PCI_HOST_BRIDGE(dev);
phb = PCI_HOST_BRIDGE(dev);
@@ -889,6 +888,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
&s->pchip.reg_mem, &s->pchip.reg_io,
0, 64, TYPE_PCI_BUS);
phb->bus = b;
+ qdev_init_nofail(dev);
/* Host memory as seen from the PCI side, via the IOMMU. */
memory_region_init_iommu(&s->pchip.iommu, OBJECT(s), &typhoon_iommu_ops,
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 357ff90..475a822 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -798,7 +798,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
}
}
-static void virtio_blk_save(QEMUFile *f, void *opaque)
+static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
@@ -823,15 +823,12 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
qemu_put_sbyte(f, 0);
}
-static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIOBlock *s = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
- if (version_id != 2)
- return -EINVAL;
-
- return virtio_load(vdev, f, version_id);
+ return virtio_load(vdev, f, 2);
}
static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -880,7 +877,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
VirtIOBlock *s = VIRTIO_BLK(dev);
VirtIOBlkConf *conf = &s->conf;
Error *err = NULL;
- static int virtio_blk_id;
unsigned i;
if (!conf->conf.blk) {
@@ -914,7 +910,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
for (i = 0; i < conf->num_queues; i++) {
- virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+ virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output);
}
virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
if (err != NULL) {
@@ -924,8 +920,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
- register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
- virtio_blk_save, virtio_blk_load, s);
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
@@ -940,7 +934,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
virtio_blk_data_plane_destroy(s->dataplane);
s->dataplane = NULL;
qemu_del_vm_change_state_handler(s->change);
- unregister_savevm(dev, "virtio-blk", s);
blockdev_mark_auto_del(s->blk);
virtio_cleanup(vdev);
}
@@ -958,6 +951,8 @@ static void virtio_blk_instance_init(Object *obj)
DEVICE(obj), NULL);
}
+VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save);
+
static Property virtio_blk_properties[] = {
DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf),
DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf),
@@ -979,6 +974,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_blk_properties;
+ dc->vmsd = &vmstate_virtio_blk;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = virtio_blk_device_realize;
vdc->unrealize = virtio_blk_device_unrealize;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 6e5de6d..db57a38 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -594,12 +594,6 @@ static void vser_reset(VirtIODevice *vdev)
guest_reset(vser);
}
-static void virtio_serial_save(QEMUFile *f, void *opaque)
-{
- /* The virtio device */
- virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f)
{
VirtIOSerial *s = VIRTIO_SERIAL(vdev);
@@ -685,7 +679,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque)
s->post_load = NULL;
}
-static int fetch_active_ports_list(QEMUFile *f, int version_id,
+static int fetch_active_ports_list(QEMUFile *f,
VirtIOSerial *s, uint32_t nr_active_ports)
{
uint32_t i;
@@ -702,6 +696,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
/* Items in struct VirtIOSerialPort */
for (i = 0; i < nr_active_ports; i++) {
VirtIOSerialPort *port;
+ uint32_t elem_popped;
uint32_t id;
id = qemu_get_be32(f);
@@ -714,37 +709,29 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
s->post_load->connected[i].port = port;
s->post_load->connected[i].host_connected = qemu_get_byte(f);
- if (version_id > 2) {
- uint32_t elem_popped;
-
- qemu_get_be32s(f, &elem_popped);
- if (elem_popped) {
- qemu_get_be32s(f, &port->iov_idx);
- qemu_get_be64s(f, &port->iov_offset);
+ qemu_get_be32s(f, &elem_popped);
+ if (elem_popped) {
+ qemu_get_be32s(f, &port->iov_idx);
+ qemu_get_be64s(f, &port->iov_offset);
- port->elem =
- qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
+ port->elem =
+ qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
- /*
- * Port was throttled on source machine. Let's
- * unthrottle it here so data starts flowing again.
- */
- virtio_serial_throttle_port(port, false);
- }
+ /*
+ * Port was throttled on source machine. Let's
+ * unthrottle it here so data starts flowing again.
+ */
+ virtio_serial_throttle_port(port, false);
}
}
timer_mod(s->post_load->timer, 1);
return 0;
}
-static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_serial_load(QEMUFile *f, void *opaque, size_t size)
{
- if (version_id > 3) {
- return -EINVAL;
- }
-
/* The virtio device */
- return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+ return virtio_load(VIRTIO_DEVICE(opaque), f, 3);
}
static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -756,10 +743,6 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
int ret;
uint32_t tmp;
- if (version_id < 2) {
- return 0;
- }
-
/* Unused */
qemu_get_be16s(f, (uint16_t *) &tmp);
qemu_get_be16s(f, (uint16_t *) &tmp);
@@ -781,7 +764,7 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
qemu_get_be32s(f, &nr_active_ports);
if (nr_active_ports) {
- ret = fetch_active_ports_list(f, version_id, s, nr_active_ports);
+ ret = fetch_active_ports_list(f, s, nr_active_ports);
if (ret) {
return ret;
}
@@ -1049,13 +1032,6 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
vser->post_load = NULL;
- /*
- * Register for the savevm section with the virtio-console name
- * to preserve backward compat
- */
- register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save,
- virtio_serial_load, vser);
-
QLIST_INSERT_HEAD(&vserdevices.devices, vser, next);
}
@@ -1086,8 +1062,6 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
QLIST_REMOVE(vser, next);
- unregister_savevm(dev, "virtio-console", vser);
-
g_free(vser->ivqs);
g_free(vser->ovqs);
g_free(vser->ports_map);
@@ -1100,6 +1074,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
virtio_cleanup(vdev);
}
+/* Note: 'console' is used for backwards compatibility */
+VMSTATE_VIRTIO_DEVICE(console, 3, virtio_serial_load, virtio_vmstate_save);
+
static Property virtio_serial_properties[] = {
DEFINE_PROP_UINT32("max_ports", VirtIOSerial, serial.max_virtserial_ports,
31),
@@ -1115,6 +1092,7 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data)
QLIST_INIT(&vserdevices.devices);
dc->props = virtio_serial_properties;
+ dc->vmsd = &vmstate_virtio_console;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
vdc->realize = virtio_serial_device_realize;
vdc->unrealize = virtio_serial_device_unrealize;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 929c3c8..7fe6ed8 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -19,6 +19,7 @@
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-gpu.h"
#include "hw/virtio/virtio-bus.h"
+#include "migration/migration.h"
#include "qemu/log.h"
#include "qapi/error.h"
@@ -986,12 +987,7 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = {
},
};
-static const VMStateDescription vmstate_virtio_gpu_unmigratable = {
- .name = "virtio-gpu-with-virgl",
- .unmigratable = 1,
-};
-
-static void virtio_gpu_save(QEMUFile *f, void *opaque)
+static void virtio_gpu_save(QEMUFile *f, void *opaque, size_t size)
{
VirtIOGPU *g = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(g);
@@ -1021,7 +1017,7 @@ static void virtio_gpu_save(QEMUFile *f, void *opaque)
vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL);
}
-static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIOGPU *g = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(g);
@@ -1030,11 +1026,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id)
uint32_t resource_id, pformat;
int i, ret;
- if (version_id != VIRTIO_GPU_VM_VERSION) {
- return -EINVAL;
- }
-
- ret = virtio_load(vdev, f, version_id);
+ ret = virtio_load(vdev, f, VIRTIO_GPU_VM_VERSION);
if (ret) {
return ret;
}
@@ -1169,10 +1161,17 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
}
if (virtio_gpu_virgl_enabled(g->conf)) {
- vmstate_register(qdev, -1, &vmstate_virtio_gpu_unmigratable, g);
- } else {
- register_savevm(qdev, "virtio-gpu", -1, VIRTIO_GPU_VM_VERSION,
- virtio_gpu_save, virtio_gpu_load, g);
+ error_setg(&g->migration_blocker, "virgl is not yet migratable");
+ migrate_add_blocker(g->migration_blocker);
+ }
+}
+
+static void virtio_gpu_device_unrealize(DeviceState *qdev, Error **errp)
+{
+ VirtIOGPU *g = VIRTIO_GPU(qdev);
+ if (g->migration_blocker) {
+ migrate_del_blocker(g->migration_blocker);
+ error_free(g->migration_blocker);
}
}
@@ -1220,6 +1219,9 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
#endif
}
+VMSTATE_VIRTIO_DEVICE(gpu, VIRTIO_GPU_VM_VERSION, virtio_gpu_load,
+ virtio_gpu_save);
+
static Property virtio_gpu_properties[] = {
DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
#ifdef CONFIG_VIRGL
@@ -1237,6 +1239,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
vdc->realize = virtio_gpu_device_realize;
+ vdc->unrealize = virtio_gpu_device_unrealize;
vdc->get_config = virtio_gpu_get_config;
vdc->set_config = virtio_gpu_set_config;
vdc->get_features = virtio_gpu_get_features;
@@ -1245,6 +1248,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
vdc->reset = virtio_gpu_reset;
dc->props = virtio_gpu_properties;
+ dc->vmsd = &vmstate_virtio_gpu;
}
static const TypeInfo virtio_gpu_info = {
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index b52d5b8..90e94ff 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -2,7 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
obj-y += multiboot.o
obj-y += pc.o pc_piix.o pc_q35.o
obj-y += pc_sysfw.o
-obj-y += intel_iommu.o
+obj-y += x86-iommu.o intel_iommu.o
obj-$(CONFIG_XEN) += ../xenpv/ xen/
obj-y += kvmvapic.o
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index fbba461..77c40d9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -52,13 +52,14 @@
#include "hw/i386/ich9.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci-host/q35.h"
-#include "hw/i386/intel_iommu.h"
+#include "hw/i386/x86-iommu.h"
#include "hw/timer/hpet.h"
#include "hw/acpi/aml-build.h"
#include "qapi/qmp/qint.h"
#include "qom/qom-qobject.h"
+#include "hw/i386/x86-iommu.h"
#include "hw/acpi/ipmi.h"
@@ -80,6 +81,9 @@
#define ACPI_BUILD_DPRINTF(fmt, ...)
#endif
+/* Default IOAPIC ID */
+#define ACPI_BUILD_IOAPIC_ID 0x0
+
typedef struct AcpiMcfgInfo {
uint64_t mcfg_base;
uint32_t mcfg_size;
@@ -383,7 +387,6 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
io_apic = acpi_data_push(table_data, sizeof *io_apic);
io_apic->type = ACPI_APIC_IO;
io_apic->length = sizeof(*io_apic);
-#define ACPI_BUILD_IOAPIC_ID 0x0
io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
io_apic->interrupt = cpu_to_le32(0);
@@ -2454,6 +2457,10 @@ build_mcfg_q35(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info)
build_header(linker, table_data, (void *)mcfg, sig, len, 1, NULL, NULL);
}
+/*
+ * VT-d spec 8.1 DMA Remapping Reporting Structure
+ * (version Oct. 2014 or later)
+ */
static void
build_dmar_q35(GArray *table_data, BIOSLinker *linker)
{
@@ -2461,19 +2468,38 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
AcpiTableDmar *dmar;
AcpiDmarHardwareUnit *drhd;
+ uint8_t dmar_flags = 0;
+ X86IOMMUState *iommu = x86_iommu_get_default();
+ AcpiDmarDeviceScope *scope = NULL;
+ /* Root complex IOAPIC use one path[0] only */
+ size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+
+ assert(iommu);
+ if (iommu->intr_supported) {
+ dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */
+ }
dmar = acpi_data_push(table_data, sizeof(*dmar));
dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
- dmar->flags = 0; /* No intr_remap for now */
+ dmar->flags = dmar_flags;
/* DMAR Remapping Hardware Unit Definition structure */
- drhd = acpi_data_push(table_data, sizeof(*drhd));
+ drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size);
drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
- drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */
+ drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size);
drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
drhd->pci_segment = cpu_to_le16(0);
drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+ /* Scope definition for the root-complex IOAPIC. See VT-d spec
+ * 8.3.1 (version Oct. 2014 or later). */
+ scope = &drhd->scope[0];
+ scope->entry_type = 0x03; /* Type: 0x03 for IOAPIC */
+ scope->length = ioapic_scope_size;
+ scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
+ scope->bus = Q35_PSEUDO_BUS_PLATFORM;
+ scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC);
+
build_header(linker, table_data, (void *)(table_data->data + dmar_start),
"DMAR", table_data->len - dmar_start, 1, NULL, NULL);
}
@@ -2539,12 +2565,7 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
static bool acpi_has_iommu(void)
{
- bool ambiguous;
- Object *intel_iommu;
-
- intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
- &ambiguous);
- return intel_iommu && !ambiguous;
+ return !!x86_iommu_get_default();
}
static
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 464f2a0..28c31a2 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -20,18 +20,23 @@
*/
#include "qemu/osdep.h"
+#include "qemu/error-report.h"
#include "hw/sysbus.h"
#include "exec/address-spaces.h"
#include "intel_iommu_internal.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/i386/pc.h"
+#include "hw/boards.h"
+#include "hw/i386/x86-iommu.h"
+#include "hw/pci-host/q35.h"
+#include "sysemu/kvm.h"
/*#define DEBUG_INTEL_IOMMU*/
#ifdef DEBUG_INTEL_IOMMU
enum {
DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
- DEBUG_CACHE,
+ DEBUG_CACHE, DEBUG_IR,
};
#define VTD_DBGBIT(x) (1 << DEBUG_##x)
static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
@@ -192,7 +197,7 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
VTD_DPRINTF(CACHE, "global context_cache_gen=1");
while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
- for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
+ for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
vtd_as = vtd_bus->dev_as[devfn_it];
if (!vtd_as) {
continue;
@@ -901,6 +906,27 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
(s->root_extended ? "(extended)" : ""));
}
+static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
+ uint32_t index, uint32_t mask)
+{
+ x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
+}
+
+static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
+{
+ uint64_t value = 0;
+ value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
+ s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
+ s->intr_root = value & VTD_IRTA_ADDR_MASK;
+ s->intr_eime = value & VTD_IRTA_EIME;
+
+ /* Notify global invalidation */
+ vtd_iec_notify_all(s, true, 0, 0);
+
+ VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32,
+ s->intr_root, s->intr_size);
+}
+
static void vtd_context_global_invalidate(IntelIOMMUState *s)
{
s->context_cache_gen++;
@@ -964,7 +990,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
if (vtd_bus) {
devfn = VTD_SID_TO_DEVFN(source_id);
- for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
+ for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
vtd_as = vtd_bus->dev_as[devfn_it];
if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
@@ -1139,6 +1165,16 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
}
+/* Set Interrupt Remap Table Pointer */
+static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
+{
+ VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
+
+ vtd_interrupt_remap_table_setup(s);
+ /* Ok - report back to driver */
+ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
+}
+
/* Handle Translation Enable/Disable */
static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
{
@@ -1158,6 +1194,22 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
}
}
+/* Handle Interrupt Remap Enable/Disable */
+static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
+{
+ VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off"));
+
+ if (en) {
+ s->intr_enabled = true;
+ /* Ok - report back to driver */
+ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES);
+ } else {
+ s->intr_enabled = false;
+ /* Ok - report back to driver */
+ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0);
+ }
+}
+
/* Handle write to Global Command Register */
static void vtd_handle_gcmd_write(IntelIOMMUState *s)
{
@@ -1178,6 +1230,14 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
/* Queued Invalidation Enable */
vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
}
+ if (val & VTD_GCMD_SIRTP) {
+ /* Set/update the interrupt remapping root-table pointer */
+ vtd_handle_gcmd_sirtp(s);
+ }
+ if (changed & VTD_GCMD_IRE) {
+ /* Interrupt remap enable/disable */
+ vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
+ }
}
/* Handle write to Context Command Register */
@@ -1363,6 +1423,21 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
return true;
}
+static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d",
+ inv_desc->iec.granularity,
+ inv_desc->iec.index,
+ inv_desc->iec.index_mask);
+
+ vtd_iec_notify_all(s, !inv_desc->iec.granularity,
+ inv_desc->iec.index,
+ inv_desc->iec.index_mask);
+
+ return true;
+}
+
static bool vtd_process_inv_desc(IntelIOMMUState *s)
{
VTDInvDesc inv_desc;
@@ -1402,6 +1477,15 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_IEC:
+ VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache "
+ "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+ inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
default:
VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
"hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
@@ -1830,6 +1914,23 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
vtd_update_fsts_ppf(s);
break;
+ case DMAR_IRTA_REG:
+ VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
+ ", size %d, val 0x%"PRIx64, addr, size, val);
+ if (size == 4) {
+ vtd_set_long(s, addr, val);
+ } else {
+ vtd_set_quad(s, addr, val);
+ }
+ break;
+
+ case DMAR_IRTA_REG_HI:
+ VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
+ ", size %d, val 0x%"PRIx64, addr, size, val);
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ break;
+
default:
VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
", size %d, val 0x%"PRIx64, addr, size, val);
@@ -1907,6 +2008,295 @@ static Property vtd_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+/* Read IRTE entry with specific index */
+static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
+ VTD_IR_TableEntry *entry, uint16_t sid)
+{
+ static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
+ {0xffff, 0xfffb, 0xfff9, 0xfff8};
+ dma_addr_t addr = 0x00;
+ uint16_t mask, source_id;
+ uint8_t bus, bus_max, bus_min;
+
+ addr = iommu->intr_root + index * sizeof(*entry);
+ if (dma_memory_read(&address_space_memory, addr, entry,
+ sizeof(*entry))) {
+ VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
+ " + %"PRIu16, iommu->intr_root, index);
+ return -VTD_FR_IR_ROOT_INVAL;
+ }
+
+ if (!entry->irte.present) {
+ VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
+ " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
+ index, le64_to_cpu(entry->data[1]),
+ le64_to_cpu(entry->data[0]));
+ return -VTD_FR_IR_ENTRY_P;
+ }
+
+ if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
+ entry->irte.__reserved_2) {
+ VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
+ " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
+ index, le64_to_cpu(entry->data[1]),
+ le64_to_cpu(entry->data[0]));
+ return -VTD_FR_IR_IRTE_RSVD;
+ }
+
+ if (sid != X86_IOMMU_SID_INVALID) {
+ /* Validate IRTE SID */
+ source_id = le32_to_cpu(entry->irte.source_id);
+ switch (entry->irte.sid_vtype) {
+ case VTD_SVT_NONE:
+ VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
+ break;
+
+ case VTD_SVT_ALL:
+ mask = vtd_svt_mask[entry->irte.sid_q];
+ if ((source_id & mask) != (sid & mask)) {
+ VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
+ "%d failed (reqid 0x%04x sid 0x%04x)", index,
+ sid, source_id);
+ return -VTD_FR_IR_SID_ERR;
+ }
+ break;
+
+ case VTD_SVT_BUS:
+ bus_max = source_id >> 8;
+ bus_min = source_id & 0xff;
+ bus = sid >> 8;
+ if (bus > bus_max || bus < bus_min) {
+ VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d "
+ "failed (bus %d outside %d-%d)", index, bus,
+ bus_min, bus_max);
+ return -VTD_FR_IR_SID_ERR;
+ }
+ break;
+
+ default:
+ VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
+ "%d", entry->irte.sid_vtype, index);
+ /* Take this as verification failure. */
+ return -VTD_FR_IR_SID_ERR;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Fetch IRQ information of specific IR index */
+static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
+ VTDIrq *irq, uint16_t sid)
+{
+ VTD_IR_TableEntry irte = {};
+ int ret = 0;
+
+ ret = vtd_irte_get(iommu, index, &irte, sid);
+ if (ret) {
+ return ret;
+ }
+
+ irq->trigger_mode = irte.irte.trigger_mode;
+ irq->vector = irte.irte.vector;
+ irq->delivery_mode = irte.irte.delivery_mode;
+ irq->dest = le32_to_cpu(irte.irte.dest_id);
+ if (!iommu->intr_eime) {
+#define VTD_IR_APIC_DEST_MASK (0xff00ULL)
+#define VTD_IR_APIC_DEST_SHIFT (8)
+ irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >>
+ VTD_IR_APIC_DEST_SHIFT;
+ }
+ irq->dest_mode = irte.irte.dest_mode;
+ irq->redir_hint = irte.irte.redir_hint;
+
+ VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
+ "deliver:%u,dest:%u,dest_mode:%u", index,
+ irq->trigger_mode, irq->vector, irq->delivery_mode,
+ irq->dest, irq->dest_mode);
+
+ return 0;
+}
+
+/* Generate one MSI message from VTDIrq info */
+static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
+{
+ VTD_MSIMessage msg = {};
+
+ /* Generate address bits */
+ msg.dest_mode = irq->dest_mode;
+ msg.redir_hint = irq->redir_hint;
+ msg.dest = irq->dest;
+ msg.__addr_head = cpu_to_le32(0xfee);
+ /* Keep this from original MSI address bits */
+ msg.__not_used = irq->msi_addr_last_bits;
+
+ /* Generate data bits */
+ msg.vector = irq->vector;
+ msg.delivery_mode = irq->delivery_mode;
+ msg.level = 1;
+ msg.trigger_mode = irq->trigger_mode;
+
+ msg_out->address = msg.msi_addr;
+ msg_out->data = msg.msi_data;
+}
+
+/* Interrupt remapping for MSI/MSI-X entry */
+static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
+ MSIMessage *origin,
+ MSIMessage *translated,
+ uint16_t sid)
+{
+ int ret = 0;
+ VTD_IR_MSIAddress addr;
+ uint16_t index;
+ VTDIrq irq = {};
+
+ assert(origin && translated);
+
+ if (!iommu || !iommu->intr_enabled) {
+ goto do_not_translate;
+ }
+
+ if (origin->address & VTD_MSI_ADDR_HI_MASK) {
+ VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
+ " during interrupt remapping: 0x%"PRIx32,
+ (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
+ VTD_MSI_ADDR_HI_SHIFT));
+ return -VTD_FR_IR_REQ_RSVD;
+ }
+
+ addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
+ if (le16_to_cpu(addr.addr.__head) != 0xfee) {
+ VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
+ "0x%"PRIx32, addr.data);
+ return -VTD_FR_IR_REQ_RSVD;
+ }
+
+ /* This is compatible mode. */
+ if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
+ goto do_not_translate;
+ }
+
+ index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
+
+#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
+#define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
+
+ if (addr.addr.sub_valid) {
+ /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */
+ index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
+ }
+
+ ret = vtd_remap_irq_get(iommu, index, &irq, sid);
+ if (ret) {
+ return ret;
+ }
+
+ if (addr.addr.sub_valid) {
+ VTD_DPRINTF(IR, "received MSI interrupt");
+ if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
+ VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
+ "interrupt remappable entry: 0x%"PRIx32,
+ origin->data);
+ return -VTD_FR_IR_REQ_RSVD;
+ }
+ } else {
+ uint8_t vector = origin->data & 0xff;
+ VTD_DPRINTF(IR, "received IOAPIC interrupt");
+ /* IOAPIC entry vector should be aligned with IRTE vector
+ * (see vt-d spec 5.1.5.1). */
+ if (vector != irq.vector) {
+ VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
+ "entry: %d, IRTE: %d, index: %d",
+ vector, irq.vector, index);
+ }
+ }
+
+ /*
+ * We'd better keep the last two bits, assuming that guest OS
+ * might modify it. Keep it does not hurt after all.
+ */
+ irq.msi_addr_last_bits = addr.addr.__not_care;
+
+ /* Translate VTDIrq to MSI message */
+ vtd_generate_msi_message(&irq, translated);
+
+ VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
+ "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
+ translated->address, translated->data);
+ return 0;
+
+do_not_translate:
+ memcpy(translated, origin, sizeof(*origin));
+ return 0;
+}
+
+static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
+ MSIMessage *dst, uint16_t sid)
+{
+ return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
+ src, dst, sid);
+}
+
+static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size,
+ MemTxAttrs attrs)
+{
+ return MEMTX_OK;
+}
+
+static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size,
+ MemTxAttrs attrs)
+{
+ int ret = 0;
+ MSIMessage from = {}, to = {};
+ uint16_t sid = X86_IOMMU_SID_INVALID;
+
+ from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
+ from.data = (uint32_t) value;
+
+ if (!attrs.unspecified) {
+ /* We have explicit Source ID */
+ sid = attrs.requester_id;
+ }
+
+ ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
+ if (ret) {
+ /* TODO: report error */
+ VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
+ " data 0x%"PRIx32, from.address, from.data);
+ /* Drop this interrupt */
+ return MEMTX_ERROR;
+ }
+
+ VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
+ " for device sid 0x%04x",
+ to.address, to.data, sid);
+
+ if (dma_memory_write(&address_space_memory, to.address,
+ &to.data, size)) {
+ VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64
+ " value 0x%"PRIx32, to.address, to.data);
+ }
+
+ return MEMTX_OK;
+}
+
+static const MemoryRegionOps vtd_mem_ir_ops = {
+ .read_with_attrs = vtd_mem_ir_read,
+ .write_with_attrs = vtd_mem_ir_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
{
@@ -1916,7 +2306,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
if (!vtd_bus) {
/* No corresponding free() */
- vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
+ vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
+ X86_IOMMU_PCI_DEVFN_MAX);
vtd_bus->bus = bus;
key = (uintptr_t)bus;
g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus);
@@ -1933,6 +2324,11 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
&s->iommu_ops, "intel_iommu", UINT64_MAX);
+ memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
+ &vtd_mem_ir_ops, s, "intel_iommu_ir",
+ VTD_INTERRUPT_ADDR_SIZE);
+ memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST,
+ &vtd_dev_as->iommu_ir);
address_space_init(&vtd_dev_as->as,
&vtd_dev_as->iommu, "intel_iommu");
}
@@ -1944,6 +2340,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
*/
static void vtd_init(IntelIOMMUState *s)
{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
memset(s->csr, 0, DMAR_REG_SIZE);
memset(s->wmask, 0, DMAR_REG_SIZE);
memset(s->w1cmask, 0, DMAR_REG_SIZE);
@@ -1965,6 +2363,10 @@ static void vtd_init(IntelIOMMUState *s)
VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
+ if (x86_iommu->intr_supported) {
+ s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV;
+ }
+
vtd_reset_context_cache(s);
vtd_reset_iotlb(s);
@@ -2014,6 +2416,11 @@ static void vtd_init(IntelIOMMUState *s)
/* Fault Recording Registers, 128-bit */
vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0);
vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL);
+
+ /*
+ * Interrupt remapping registers.
+ */
+ vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
}
/* Should not reset address_spaces when reset because devices will still use
@@ -2032,7 +2439,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
IntelIOMMUState *s = opaque;
VTDAddressSpace *vtd_as;
- assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
+ assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX);
vtd_as = vtd_find_add_as(s, bus, devfn);
return &vtd_as->as;
@@ -2040,8 +2447,10 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
static void vtd_realize(DeviceState *dev, Error **errp)
{
- PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+ PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+ PCIBus *bus = pcms->bus;
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
VTD_DPRINTF(GENERAL, "");
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
@@ -2056,22 +2465,34 @@ static void vtd_realize(DeviceState *dev, Error **errp)
vtd_init(s);
sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+ /* Pseudo address space under root PCI bus. */
+ pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
+
+ /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
+ if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
+ !kvm_irqchip_is_split()) {
+ error_report("Intel Interrupt Remapping cannot work with "
+ "kernel-irqchip=on, please use 'split|off'.");
+ exit(1);
+ }
}
static void vtd_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
+ X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass);
dc->reset = vtd_reset;
- dc->realize = vtd_realize;
dc->vmsd = &vtd_vmstate;
dc->props = vtd_properties;
dc->hotpluggable = false;
+ x86_class->realize = vtd_realize;
+ x86_class->int_remap = vtd_int_remap;
}
static const TypeInfo vtd_info = {
.name = TYPE_INTEL_IOMMU_DEVICE,
- .parent = TYPE_SYS_BUS_DEVICE,
+ .parent = TYPE_X86_IOMMU_DEVICE,
.instance_size = sizeof(IntelIOMMUState),
.class_init = vtd_class_init,
};
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e5f514c..0829a50 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -110,6 +110,8 @@
/* Interrupt Address Range */
#define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL
#define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL
+#define VTD_INTERRUPT_ADDR_SIZE (VTD_INTERRUPT_ADDR_LAST - \
+ VTD_INTERRUPT_ADDR_FIRST + 1)
/* The shift of source_id in the key of IOTLB hash table */
#define VTD_IOTLB_SID_SHIFT 36
@@ -172,10 +174,19 @@
#define VTD_RTADDR_RTT (1ULL << 11)
#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
+/* IRTA_REG */
+#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IRTA_EIME (1ULL << 11)
+#define VTD_IRTA_SIZE_MASK (0xfULL)
+
/* ECAP_REG */
/* (offset >> 4) << 8 */
#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4)
#define VTD_ECAP_QI (1ULL << 1)
+/* Interrupt Remapping support */
+#define VTD_ECAP_IR (1ULL << 3)
+#define VTD_ECAP_EIM (1ULL << 4)
+#define VTD_ECAP_MHMV (15ULL << 20)
/* CAP_REG */
/* (offset >> 4) << 24 */
@@ -265,6 +276,19 @@ typedef enum VTDFaultReason {
* context-entry.
*/
VTD_FR_CONTEXT_ENTRY_TT,
+
+ /* Interrupt remapping transition faults */
+ VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved
+ * fields set */
+ VTD_FR_IR_INDEX_OVER = 0x21, /* Index value greater than max */
+ VTD_FR_IR_ENTRY_P = 0x22, /* Present (P) not set in IRTE */
+ VTD_FR_IR_ROOT_INVAL = 0x23, /* IR Root table invalid */
+ VTD_FR_IR_IRTE_RSVD = 0x24, /* IRTE Rsvd field non-zero with
+ * Present flag set */
+ VTD_FR_IR_REQ_COMPAT = 0x25, /* Encountered compatible IR
+ * request while disabled */
+ VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */
+
/* This is not a normal fault reason. We use this to indicate some faults
* that are not referenced by the VT-d specification.
* Fault event with such reason should not be recorded.
@@ -275,17 +299,35 @@ typedef enum VTDFaultReason {
#define VTD_CONTEXT_CACHE_GEN_MAX 0xffffffffUL
+/* Interrupt Entry Cache Invalidation Descriptor: VT-d 6.5.2.7. */
+struct VTDInvDescIEC {
+ uint32_t type:4; /* Should always be 0x4 */
+ uint32_t granularity:1; /* If set, it's global IR invalidation */
+ uint32_t resved_1:22;
+ uint32_t index_mask:5; /* 2^N for continuous int invalidation */
+ uint32_t index:16; /* Start index to invalidate */
+ uint32_t reserved_2:16;
+};
+typedef struct VTDInvDescIEC VTDInvDescIEC;
+
/* Queued Invalidation Descriptor */
-struct VTDInvDesc {
- uint64_t lo;
- uint64_t hi;
+union VTDInvDesc {
+ struct {
+ uint64_t lo;
+ uint64_t hi;
+ };
+ union {
+ VTDInvDescIEC iec;
+ };
};
-typedef struct VTDInvDesc VTDInvDesc;
+typedef union VTDInvDesc VTDInvDesc;
/* Masks for struct VTDInvDesc */
#define VTD_INV_DESC_TYPE 0xf
#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */
#define VTD_INV_DESC_IOTLB 0x2
+#define VTD_INV_DESC_IEC 0x4 /* Interrupt Entry Cache
+ Invalidate Descriptor */
#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */
#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 1a429e5..8238fbc 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -974,10 +974,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
}
if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
- MSIMessage msg = msi_get_message(pci_dev, 0);
int virq;
- virq = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev);
+ virq = kvm_irqchip_add_msi_route(kvm_state, 0, pci_dev);
if (virq < 0) {
perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
return;
@@ -1016,6 +1015,7 @@ static void assigned_dev_update_msi_msg(PCIDevice *pci_dev)
kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0],
msi_get_message(pci_dev, 0), pci_dev);
+ kvm_irqchip_commit_routes(kvm_state);
}
static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
@@ -1042,7 +1042,6 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
uint16_t entries_nr = 0;
int i, r = 0;
MSIXTableEntry *entry = adev->msix_table;
- MSIMessage msg;
/* Get the usable entry number for allocating */
for (i = 0; i < adev->msix_max; i++, entry++) {
@@ -1079,9 +1078,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
continue;
}
- msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32);
- msg.data = entry->data;
- r = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev);
+ r = kvm_irqchip_add_msi_route(kvm_state, i, pci_dev);
if (r < 0) {
return r;
}
@@ -1606,6 +1603,7 @@ static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr,
if (ret) {
error_report("Error updating irq routing entry (%d)", ret);
}
+ kvm_irqchip_commit_routes(kvm_state);
}
}
}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index ac7a4d5..9e3c70f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1473,6 +1473,9 @@ void pc_memory_init(PCMachineState *pcms,
rom_add_option(option_rom[i].name, option_rom[i].bootindex);
}
pcms->fw_cfg = fw_cfg;
+
+ /* Init default IOAPIC address space */
+ pcms->ioapic_as = &address_space_memory;
}
qemu_irq pc_allocate_cpu_irq(void)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index ea77bc2..b4882c1 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -10,3 +10,6 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad
# hw/i386/pc.c
mhp_pc_dimm_assigned_slot(int slot) "0x%d"
mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
+
+# hw/i386/x86-iommu.c
+x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
new file mode 100644
index 0000000..ce26b2a
--- /dev/null
+++ b/hw/i386/x86-iommu.c
@@ -0,0 +1,128 @@
+/*
+ * QEMU emulation of common X86 IOMMU
+ *
+ * Copyright (C) 2016 Peter Xu, Red Hat <peterx@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/boards.h"
+#include "hw/i386/x86-iommu.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+void x86_iommu_iec_register_notifier(X86IOMMUState *iommu,
+ iec_notify_fn fn, void *data)
+{
+ IEC_Notifier *notifier = g_new0(IEC_Notifier, 1);
+
+ notifier->iec_notify = fn;
+ notifier->private = data;
+
+ QLIST_INSERT_HEAD(&iommu->iec_notifiers, notifier, list);
+}
+
+void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global,
+ uint32_t index, uint32_t mask)
+{
+ IEC_Notifier *notifier;
+
+ trace_x86_iommu_iec_notify(global, index, mask);
+
+ QLIST_FOREACH(notifier, &iommu->iec_notifiers, list) {
+ if (notifier->iec_notify) {
+ notifier->iec_notify(notifier->private, global,
+ index, mask);
+ }
+ }
+}
+
+/* Default X86 IOMMU device */
+static X86IOMMUState *x86_iommu_default = NULL;
+
+static void x86_iommu_set_default(X86IOMMUState *x86_iommu)
+{
+ assert(x86_iommu);
+
+ if (x86_iommu_default) {
+ error_report("QEMU does not support multiple vIOMMUs "
+ "for x86 yet.");
+ exit(1);
+ }
+
+ x86_iommu_default = x86_iommu;
+}
+
+X86IOMMUState *x86_iommu_get_default(void)
+{
+ return x86_iommu_default;
+}
+
+static void x86_iommu_realize(DeviceState *dev, Error **errp)
+{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+ X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev);
+ QLIST_INIT(&x86_iommu->iec_notifiers);
+ if (x86_class->realize) {
+ x86_class->realize(dev, errp);
+ }
+ x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
+}
+
+static void x86_iommu_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ dc->realize = x86_iommu_realize;
+}
+
+static bool x86_iommu_intremap_prop_get(Object *o, Error **errp)
+{
+ X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+ return s->intr_supported;
+}
+
+static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp)
+{
+ X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+ s->intr_supported = value;
+}
+
+static void x86_iommu_instance_init(Object *o)
+{
+ X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+
+ /* By default, do not support IR */
+ s->intr_supported = false;
+ object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
+ x86_iommu_intremap_prop_set, NULL);
+}
+
+static const TypeInfo x86_iommu_info = {
+ .name = TYPE_X86_IOMMU_DEVICE,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_init = x86_iommu_instance_init,
+ .instance_size = sizeof(X86IOMMUState),
+ .class_init = x86_iommu_class_init,
+ .class_size = sizeof(X86IOMMUClass),
+ .abstract = true,
+};
+
+static void x86_iommu_register_types(void)
+{
+ type_register_static(&x86_iommu_info);
+}
+
+type_init(x86_iommu_register_types)
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index edf6990..a87fd68 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -217,26 +217,14 @@ static void virtio_input_reset(VirtIODevice *vdev)
}
}
-static void virtio_input_save(QEMUFile *f, void *opaque)
-{
- VirtIOInput *vinput = opaque;
- VirtIODevice *vdev = VIRTIO_DEVICE(vinput);
-
- virtio_save(vdev, f);
-}
-
-static int virtio_input_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_input_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIOInput *vinput = opaque;
VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vinput);
VirtIODevice *vdev = VIRTIO_DEVICE(vinput);
int ret;
- if (version_id != VIRTIO_INPUT_VM_VERSION) {
- return -EINVAL;
- }
-
- ret = virtio_load(vdev, f, version_id);
+ ret = virtio_load(vdev, f, VIRTIO_INPUT_VM_VERSION);
if (ret) {
return ret;
}
@@ -280,20 +268,14 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp)
vinput->cfg_size);
vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt);
vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts);
-
- register_savevm(dev, "virtio-input", -1, VIRTIO_INPUT_VM_VERSION,
- virtio_input_save, virtio_input_load, vinput);
}
static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev);
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
- VirtIOInput *vinput = VIRTIO_INPUT(dev);
Error *local_err = NULL;
- unregister_savevm(dev, "virtio-input", vinput);
-
if (vic->unrealize) {
vic->unrealize(dev, &local_err);
if (local_err) {
@@ -304,6 +286,9 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
virtio_cleanup(vdev);
}
+VMSTATE_VIRTIO_DEVICE(input, VIRTIO_INPUT_VM_VERSION, virtio_input_load,
+ virtio_vmstate_save);
+
static Property virtio_input_properties[] = {
DEFINE_PROP_STRING("serial", VirtIOInput, serial),
DEFINE_PROP_END_OF_LIST(),
@@ -315,6 +300,7 @@ static void virtio_input_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_input_properties;
+ dc->vmsd = &vmstate_virtio_input;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
vdc->realize = virtio_input_device_realize;
vdc->unrealize = virtio_input_device_unrealize;
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 273bb08..2d3282a 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -29,6 +29,9 @@
#include "hw/i386/ioapic_internal.h"
#include "include/hw/pci/msi.h"
#include "sysemu/kvm.h"
+#include "target-i386/cpu.h"
+#include "hw/i386/apic-msidef.h"
+#include "hw/i386/x86-iommu.h"
//#define DEBUG_IOAPIC
@@ -48,16 +51,56 @@ static IOAPICCommonState *ioapics[MAX_IOAPICS];
/* global variable from ioapic_common.c */
extern int ioapic_no;
+struct ioapic_entry_info {
+ /* fields parsed from IOAPIC entries */
+ uint8_t masked;
+ uint8_t trig_mode;
+ uint16_t dest_idx;
+ uint8_t dest_mode;
+ uint8_t delivery_mode;
+ uint8_t vector;
+
+ /* MSI message generated from above parsed fields */
+ uint32_t addr;
+ uint32_t data;
+};
+
+static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
+ info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
+ /*
+ * By default, this would be dest_id[8] + reserved[8]. When IR
+ * is enabled, this would be interrupt_index[15] +
+ * interrupt_format[1]. This field never means anything, but
+ * only used to generate corresponding MSI.
+ */
+ info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
+ info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
+ info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
+ & IOAPIC_DM_MASK;
+ if (info->delivery_mode == IOAPIC_DM_EXTINT) {
+ info->vector = pic_read_irq(isa_pic);
+ } else {
+ info->vector = entry & IOAPIC_VECTOR_MASK;
+ }
+
+ info->addr = APIC_DEFAULT_ADDRESS | \
+ (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
+ (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
+ info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
+ (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
+ (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
+}
+
static void ioapic_service(IOAPICCommonState *s)
{
+ AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as;
+ struct ioapic_entry_info info;
uint8_t i;
- uint8_t trig_mode;
- uint8_t vector;
- uint8_t delivery_mode;
uint32_t mask;
uint64_t entry;
- uint8_t dest;
- uint8_t dest_mode;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
mask = 1 << i;
@@ -65,26 +108,18 @@ static void ioapic_service(IOAPICCommonState *s)
int coalesce = 0;
entry = s->ioredtbl[i];
- if (!(entry & IOAPIC_LVT_MASKED)) {
- trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1);
- dest = entry >> IOAPIC_LVT_DEST_SHIFT;
- dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
- delivery_mode =
- (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK;
- if (trig_mode == IOAPIC_TRIGGER_EDGE) {
+ ioapic_entry_parse(entry, &info);
+ if (!info.masked) {
+ if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
s->irr &= ~mask;
} else {
coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
}
- if (delivery_mode == IOAPIC_DM_EXTINT) {
- vector = pic_read_irq(isa_pic);
- } else {
- vector = entry & IOAPIC_VECTOR_MASK;
- }
+
#ifdef CONFIG_KVM
if (kvm_irqchip_is_split()) {
- if (trig_mode == IOAPIC_TRIGGER_EDGE) {
+ if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
kvm_set_irq(kvm_state, i, 1);
kvm_set_irq(kvm_state, i, 0);
} else {
@@ -97,8 +132,11 @@ static void ioapic_service(IOAPICCommonState *s)
#else
(void)coalesce;
#endif
- apic_deliver_irq(dest, dest_mode, delivery_mode, vector,
- trig_mode);
+ /* No matter whether IR is enabled, we translate
+ * the IOAPIC message into a MSI one, and its
+ * address space will decide whether we need a
+ * translation. */
+ stl_le_phys(ioapic_as, info.addr, info.data);
}
}
}
@@ -149,30 +187,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s)
if (kvm_irqchip_is_split()) {
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
- uint64_t entry = s->ioredtbl[i];
- uint8_t trig_mode;
- uint8_t delivery_mode;
- uint8_t dest;
- uint8_t dest_mode;
- uint64_t pin_polarity;
MSIMessage msg;
-
- trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1);
- dest = entry >> IOAPIC_LVT_DEST_SHIFT;
- dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
- pin_polarity = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
- delivery_mode =
- (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK;
-
- msg.address = APIC_DEFAULT_ADDRESS;
- msg.address |= dest_mode << 2;
- msg.address |= dest << 12;
-
- msg.data = entry & IOAPIC_VECTOR_MASK;
- msg.data |= delivery_mode << APIC_DELIVERY_MODE_SHIFT;
- msg.data |= pin_polarity << APIC_POLARITY_SHIFT;
- msg.data |= trig_mode << APIC_TRIG_MODE_SHIFT;
-
+ struct ioapic_entry_info info;
+ ioapic_entry_parse(s->ioredtbl[i], &info);
+ msg.address = info.addr;
+ msg.data = info.data;
kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
}
kvm_irqchip_commit_routes(kvm_state);
@@ -180,6 +199,16 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s)
#endif
}
+#ifdef CONFIG_KVM
+static void ioapic_iec_notifier(void *private, bool global,
+ uint32_t index, uint32_t mask)
+{
+ IOAPICCommonState *s = (IOAPICCommonState *)private;
+ /* For simplicity, we just update all the routes */
+ ioapic_update_kvm_routes(s);
+}
+#endif
+
void ioapic_eoi_broadcast(int vector)
{
IOAPICCommonState *s;
@@ -336,6 +365,24 @@ static const MemoryRegionOps ioapic_io_ops = {
.endianness = DEVICE_NATIVE_ENDIAN,
};
+static void ioapic_machine_done_notify(Notifier *notifier, void *data)
+{
+#ifdef CONFIG_KVM
+ IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
+ machine_done);
+
+ if (kvm_irqchip_is_split()) {
+ X86IOMMUState *iommu = x86_iommu_get_default();
+ if (iommu) {
+ /* Register this IOAPIC with IOMMU IEC notifier, so that
+ * when there are IR invalidates, we can be notified to
+ * update kernel IR cache. */
+ x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
+ }
+ }
+#endif
+}
+
static void ioapic_realize(DeviceState *dev, Error **errp)
{
IOAPICCommonState *s = IOAPIC_COMMON(dev);
@@ -346,6 +393,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
ioapics[ioapic_no] = s;
+ s->machine_done.notify = ioapic_machine_done_notify;
+ qemu_add_machine_init_done_notifier(&s->machine_done);
}
static void ioapic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 81896c0..7895805 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -98,6 +98,7 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
"small to contain nvdimm label (0x%" PRIx64 ") and "
"aligned PMEM (0x%" PRIx64 ")",
path, memory_region_size(mr), nvdimm->label_size, align);
+ g_free(path);
return;
}
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 3f4523d..4811843 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -1167,7 +1167,6 @@ PCIBus *gt64120_register(qemu_irq *pic)
DeviceState *dev;
dev = qdev_create(NULL, TYPE_GT64120_PCI_HOST_BRIDGE);
- qdev_init_nofail(dev);
d = GT64120_PCI_HOST_BRIDGE(dev);
phb = PCI_HOST_BRIDGE(dev);
memory_region_init(&d->pci0_mem, OBJECT(dev), "pci0-mem", UINT32_MAX);
@@ -1178,6 +1177,7 @@ PCIBus *gt64120_register(qemu_irq *pic)
&d->pci0_mem,
get_system_io(),
PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS);
+ qdev_init_nofail(dev);
memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d, "isd-mem", 0x1000);
pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "gt64120_pci");
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 7e7c843..40a2ebc 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -322,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
if (ret < 0) {
return ret;
}
+ kvm_irqchip_commit_routes(kvm_state);
return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
}
@@ -441,13 +442,12 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
Error **errp)
{
PCIDevice *pdev = PCI_DEVICE(s);
- MSIMessage msg = msix_get_message(pdev, vector);
int ret;
IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
assert(!s->msi_vectors[vector].pdev);
- ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
+ ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
if (ret < 0) {
error_setg(errp, "kvm_irqchip_add_msi_route failed");
return;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 56d8506..01f1351 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1492,7 +1492,7 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
virtio_net_set_queues(n);
}
-static void virtio_net_save(QEMUFile *f, void *opaque)
+static void virtio_net_save(QEMUFile *f, void *opaque, size_t size)
{
VirtIONet *n = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(n);
@@ -1538,15 +1538,12 @@ static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
}
}
-static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_net_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIONet *n = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(n);
- if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
- return -EINVAL;
-
- return virtio_load(vdev, f, version_id);
+ return virtio_load(vdev, f, VIRTIO_NET_VM_VERSION);
}
static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -1562,68 +1559,49 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
virtio_vdev_has_feature(vdev,
VIRTIO_F_VERSION_1));
- if (version_id >= 3)
- n->status = qemu_get_be16(f);
+ n->status = qemu_get_be16(f);
- if (version_id >= 4) {
- if (version_id < 8) {
- n->promisc = qemu_get_be32(f);
- n->allmulti = qemu_get_be32(f);
- } else {
- n->promisc = qemu_get_byte(f);
- n->allmulti = qemu_get_byte(f);
- }
- }
+ n->promisc = qemu_get_byte(f);
+ n->allmulti = qemu_get_byte(f);
- if (version_id >= 5) {
- n->mac_table.in_use = qemu_get_be32(f);
- /* MAC_TABLE_ENTRIES may be different from the saved image */
- if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
- qemu_get_buffer(f, n->mac_table.macs,
- n->mac_table.in_use * ETH_ALEN);
- } else {
- int64_t i;
-
- /* Overflow detected - can happen if source has a larger MAC table.
- * We simply set overflow flag so there's no need to maintain the
- * table of addresses, discard them all.
- * Note: 64 bit math to avoid integer overflow.
- */
- for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
- qemu_get_byte(f);
- }
- n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
- n->mac_table.in_use = 0;
+ n->mac_table.in_use = qemu_get_be32(f);
+ /* MAC_TABLE_ENTRIES may be different from the saved image */
+ if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
+ qemu_get_buffer(f, n->mac_table.macs,
+ n->mac_table.in_use * ETH_ALEN);
+ } else {
+ int64_t i;
+
+ /* Overflow detected - can happen if source has a larger MAC table.
+ * We simply set overflow flag so there's no need to maintain the
+ * table of addresses, discard them all.
+ * Note: 64 bit math to avoid integer overflow.
+ */
+ for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
+ qemu_get_byte(f);
}
+ n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
+ n->mac_table.in_use = 0;
}
- if (version_id >= 6)
- qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
+ qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
- if (version_id >= 7) {
- if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
- error_report("virtio-net: saved image requires vnet_hdr=on");
- return -1;
- }
+ if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
+ error_report("virtio-net: saved image requires vnet_hdr=on");
+ return -1;
}
- if (version_id >= 9) {
- n->mac_table.multi_overflow = qemu_get_byte(f);
- n->mac_table.uni_overflow = qemu_get_byte(f);
- }
+ n->mac_table.multi_overflow = qemu_get_byte(f);
+ n->mac_table.uni_overflow = qemu_get_byte(f);
- if (version_id >= 10) {
- n->alluni = qemu_get_byte(f);
- n->nomulti = qemu_get_byte(f);
- n->nouni = qemu_get_byte(f);
- n->nobcast = qemu_get_byte(f);
- }
+ n->alluni = qemu_get_byte(f);
+ n->nomulti = qemu_get_byte(f);
+ n->nouni = qemu_get_byte(f);
+ n->nobcast = qemu_get_byte(f);
- if (version_id >= 11) {
- if (qemu_get_byte(f) && !peer_has_ufo(n)) {
- error_report("virtio-net: saved image requires TUN_F_UFO support");
- return -1;
- }
+ if (qemu_get_byte(f) && !peer_has_ufo(n)) {
+ error_report("virtio-net: saved image requires TUN_F_UFO support");
+ return -1;
}
if (n->max_queues > 1) {
@@ -1809,8 +1787,6 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
nc->rxfilter_notify_enabled = 1;
n->qdev = dev;
- register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
- virtio_net_save, virtio_net_load, n);
}
static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
@@ -1822,8 +1798,6 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
/* This will stop vhost backend if appropriate. */
virtio_net_set_status(vdev, 0);
- unregister_savevm(dev, "virtio-net", n);
-
g_free(n->netclient_name);
n->netclient_name = NULL;
g_free(n->netclient_type);
@@ -1858,6 +1832,9 @@ static void virtio_net_instance_init(Object *obj)
DEVICE(n), NULL);
}
+VMSTATE_VIRTIO_DEVICE(net, VIRTIO_NET_VM_VERSION, virtio_net_load,
+ virtio_net_save);
+
static Property virtio_net_properties[] = {
DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
@@ -1912,6 +1889,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_net_properties;
+ dc->vmsd = &vmstate_virtio_net;
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
vdc->realize = virtio_net_device_realize;
vdc->unrealize = virtio_net_device_unrealize;
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index babbbef..16587f8 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -670,6 +670,13 @@ PCIBus *pci_apb_init(hwaddr special_base,
/* Ultrasparc PBM main bus */
dev = qdev_create(NULL, TYPE_APB);
+ d = APB_DEVICE(dev);
+ phb = PCI_HOST_BRIDGE(dev);
+ phb->bus = pci_register_bus(DEVICE(phb), "pci",
+ pci_apb_set_irq, pci_pbm_map_irq, d,
+ &d->pci_mmio,
+ get_system_io(),
+ 0, 32, TYPE_PCI_BUS);
qdev_init_nofail(dev);
s = SYS_BUS_DEVICE(dev);
/* apb_config */
@@ -678,18 +685,10 @@ PCIBus *pci_apb_init(hwaddr special_base,
sysbus_mmio_map(s, 1, special_base + 0x1000000ULL);
/* pci_ioport */
sysbus_mmio_map(s, 2, special_base + 0x2000000ULL);
- d = APB_DEVICE(dev);
memory_region_init(&d->pci_mmio, OBJECT(s), "pci-mmio", 0x100000000ULL);
memory_region_add_subregion(get_system_memory(), mem_base, &d->pci_mmio);
- phb = PCI_HOST_BRIDGE(dev);
- phb->bus = pci_register_bus(DEVICE(phb), "pci",
- pci_apb_set_irq, pci_pbm_map_irq, d,
- &d->pci_mmio,
- get_system_io(),
- 0, 32, TYPE_PCI_BUS);
-
*pbm_irqs = d->pbm_irqs;
d->ivec_irqs = ivec_irqs;
diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index 8f91216..2c8acda 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -72,7 +72,6 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
GrackleState *d;
dev = qdev_create(NULL, TYPE_GRACKLE_PCI_HOST_BRIDGE);
- qdev_init_nofail(dev);
s = SYS_BUS_DEVICE(dev);
phb = PCI_HOST_BRIDGE(dev);
d = GRACKLE_PCI_HOST_BRIDGE(dev);
@@ -92,6 +91,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
0, 4, TYPE_PCI_BUS);
pci_create_simple(phb->bus, 0, "grackle");
+ qdev_init_nofail(dev);
sysbus_mmio_map(s, 0, base);
sysbus_mmio_map(s, 1, base + 0x00200000);
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 487e32e..5580293 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -247,6 +247,7 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack);
/* TODO Remove once realize propagates to child devices. */
+ object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp);
object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp);
}
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 0792c45..467cbb9 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -455,6 +455,7 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp)
}
/* TODO Remove once realize propagates to child devices. */
+ object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp);
object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp);
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 149994b..728c6d4 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2596,6 +2596,21 @@ PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
}
}
+MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
+{
+ MSIMessage msg;
+ if (msix_enabled(dev)) {
+ msg = msix_get_message(dev, vector);
+ } else if (msi_enabled(dev)) {
+ msg = msi_get_message(dev, vector);
+ } else {
+ /* Should never happen */
+ error_report("%s: unknown interrupt type", __func__);
+ abort();
+ }
+ return msg;
+}
+
static const TypeInfo pci_device_type_info = {
.name = TYPE_PCI_DEVICE,
.parent = TYPE_DEVICE,
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 722c93e..ce57ef6 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -663,22 +663,17 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
/* The device does not have anything to save beyond the virtio data.
* Request data is saved with callbacks from SCSI devices.
*/
-static void virtio_scsi_save(QEMUFile *f, void *opaque)
+static void virtio_scsi_save(QEMUFile *f, void *opaque, size_t size)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
virtio_save(vdev, f);
}
-static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_scsi_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
- int ret;
- ret = virtio_load(vdev, f, version_id);
- if (ret) {
- return ret;
- }
- return 0;
+ return virtio_load(vdev, f, 1);
}
void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
@@ -824,8 +819,9 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = {
};
void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
- HandleOutput ctrl, HandleOutput evt,
- HandleOutput cmd)
+ VirtIOHandleOutput ctrl,
+ VirtIOHandleOutput evt,
+ VirtIOHandleOutput cmd)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev);
@@ -846,13 +842,10 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
- s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
- ctrl);
- s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
- evt);
+ s->ctrl_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl);
+ s->event_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, evt);
for (i = 0; i < s->conf.num_queues; i++) {
- s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
- cmd);
+ s->cmd_vqs[i] = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, cmd);
}
if (s->conf.iothread) {
@@ -864,7 +857,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOSCSI *s = VIRTIO_SCSI(dev);
- static int virtio_scsi_id;
Error *err = NULL;
virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl,
@@ -887,9 +879,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
return;
}
}
-
- register_savevm(dev, "virtio-scsi", virtio_scsi_id++, 1,
- virtio_scsi_save, virtio_scsi_load, s);
}
static void virtio_scsi_instance_init(Object *obj)
@@ -913,9 +902,6 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp)
static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp)
{
- VirtIOSCSI *s = VIRTIO_SCSI(dev);
-
- unregister_savevm(dev, "virtio-scsi", s);
virtio_scsi_common_unrealize(dev, errp);
}
@@ -932,6 +918,8 @@ static Property virtio_scsi_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+VMSTATE_VIRTIO_DEVICE(scsi, 1, virtio_scsi_load, virtio_scsi_save);
+
static void virtio_scsi_common_class_init(ObjectClass *klass, void *data)
{
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
@@ -948,6 +936,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data)
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
dc->props = virtio_scsi_properties;
+ dc->vmsd = &vmstate_virtio_scsi;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = virtio_scsi_device_realize;
vdc->unrealize = virtio_scsi_device_unrealize;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c8436a1..7bfa17c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
}
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
- MSIMessage *msg, bool msix)
+ int vector_n, bool msix)
{
int virq;
- if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) {
+ if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
return;
}
@@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
return;
}
- virq = kvm_irqchip_add_msi_route(kvm_state, *msg, &vdev->pdev);
+ virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev);
if (virq < 0) {
event_notifier_cleanup(&vector->kvm_interrupt);
return;
@@ -458,6 +458,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
PCIDevice *pdev)
{
kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev);
+ kvm_irqchip_commit_routes(kvm_state);
}
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
@@ -495,7 +496,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vfio_update_kvm_msi_virq(vector, *msg, pdev);
}
} else {
- vfio_add_kvm_msi_virq(vdev, vector, msg, true);
+ vfio_add_kvm_msi_virq(vdev, vector, nr, true);
}
/*
@@ -639,7 +640,6 @@ retry:
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
- MSIMessage msg = msi_get_message(&vdev->pdev, i);
vector->vdev = vdev;
vector->virq = -1;
@@ -656,7 +656,7 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vfio_add_kvm_msi_virq(vdev, vector, &msg, false);
+ vfio_add_kvm_msi_virq(vdev, vector, i, false);
}
/* Set interrupt type prior to possible interrupts */
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 1a22e6d..5af429a 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -396,11 +396,6 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
trace_virtio_balloon_to_target(target, dev->num_pages);
}
-static void virtio_balloon_save(QEMUFile *f, void *opaque)
-{
- virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
{
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -409,12 +404,9 @@ static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
qemu_put_be32(f, s->actual);
}
-static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_balloon_load(QEMUFile *f, void *opaque, size_t size)
{
- if (version_id != 1)
- return -EINVAL;
-
- return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+ return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
}
static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -454,9 +446,6 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
reset_stats(s);
-
- register_savevm(dev, "virtio-balloon", -1, 1,
- virtio_balloon_save, virtio_balloon_load, s);
}
static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
@@ -466,7 +455,6 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
balloon_stats_destroy_timer(s);
qemu_remove_balloon_handler(s);
- unregister_savevm(dev, "virtio-balloon", s);
virtio_cleanup(vdev);
}
@@ -493,6 +481,8 @@ static void virtio_balloon_instance_init(Object *obj)
NULL, s, NULL);
}
+VMSTATE_VIRTIO_DEVICE(balloon, 1, virtio_balloon_load, virtio_vmstate_save);
+
static Property virtio_balloon_properties[] = {
DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
@@ -505,6 +495,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_balloon_properties;
+ dc->vmsd = &vmstate_virtio_balloon;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->realize = virtio_balloon_device_realize;
vdc->unrealize = virtio_balloon_device_unrealize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 2b34b43..f0677b7 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -699,14 +699,13 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev,
static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int queue_no,
- unsigned int vector,
- MSIMessage msg)
+ unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
if (irqfd->users == 0) {
- ret = kvm_irqchip_add_msi_route(kvm_state, msg, &proxy->pci_dev);
+ ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
if (ret < 0) {
return ret;
}
@@ -757,7 +756,6 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
unsigned int vector;
int ret, queue_no;
- MSIMessage msg;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
@@ -767,8 +765,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
- msg = msix_get_message(dev, vector);
- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+ ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
if (ret < 0) {
goto undo;
}
@@ -845,6 +842,7 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
if (ret < 0) {
return ret;
}
+ kvm_irqchip_commit_routes(kvm_state);
}
}
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 6b991a7..cd8ca10 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -120,22 +120,12 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
return f;
}
-static void virtio_rng_save(QEMUFile *f, void *opaque)
-{
- VirtIODevice *vdev = opaque;
-
- virtio_save(vdev, f);
-}
-
-static int virtio_rng_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_rng_load(QEMUFile *f, void *opaque, size_t size)
{
VirtIORNG *vrng = opaque;
int ret;
- if (version_id != 1) {
- return -EINVAL;
- }
- ret = virtio_load(VIRTIO_DEVICE(vrng), f, version_id);
+ ret = virtio_load(VIRTIO_DEVICE(vrng), f, 1);
if (ret != 0) {
return ret;
}
@@ -214,8 +204,6 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
check_rate_limit, vrng);
vrng->activate_timer = true;
- register_savevm(dev, "virtio-rng", -1, 1, virtio_rng_save,
- virtio_rng_load, vrng);
}
static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
@@ -225,10 +213,11 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
timer_del(vrng->rate_limit_timer);
timer_free(vrng->rate_limit_timer);
- unregister_savevm(dev, "virtio-rng", vrng);
virtio_cleanup(vdev);
}
+VMSTATE_VIRTIO_DEVICE(rng, 1, virtio_rng_load, virtio_vmstate_save);
+
static Property virtio_rng_properties[] = {
/* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s. If
* you have an entropy source capable of generating more entropy than this
@@ -246,6 +235,7 @@ static void virtio_rng_class_init(ObjectClass *klass, void *data)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
dc->props = virtio_rng_properties;
+ dc->vmsd = &vmstate_virtio_rng;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->realize = virtio_rng_device_realize;
vdc->unrealize = virtio_rng_device_unrealize;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 18153d5..752b271 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -95,8 +95,9 @@ struct VirtQueue
int inuse;
uint16_t vector;
- void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
- void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq);
+ VirtIOHandleOutput handle_output;
+ VirtIOHandleOutput handle_aio_output;
+ bool use_aio;
VirtIODevice *vdev;
EventNotifier guest_notifier;
EventNotifier host_notifier;
@@ -1130,8 +1131,9 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
}
}
-VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
- void (*handle_output)(VirtIODevice *, VirtQueue *))
+static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
+ VirtIOHandleOutput handle_output,
+ bool use_aio)
{
int i;
@@ -1148,10 +1150,28 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
vdev->vq[i].handle_output = handle_output;
vdev->vq[i].handle_aio_output = NULL;
+ vdev->vq[i].use_aio = use_aio;
return &vdev->vq[i];
}
+/* Add a virt queue and mark AIO.
+ * An AIO queue will use the AioContext based event interface instead of the
+ * default IOHandler and EventNotifier interface.
+ */
+VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
+ VirtIOHandleOutput handle_output)
+{
+ return virtio_add_queue_internal(vdev, queue_size, handle_output, true);
+}
+
+/* Add a normal virt queue (on the contrary to the AIO version above. */
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+ VirtIOHandleOutput handle_output)
+{
+ return virtio_add_queue_internal(vdev, queue_size, handle_output, false);
+}
+
void virtio_del_queue(VirtIODevice *vdev, int n)
{
if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
@@ -1444,6 +1464,12 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
}
+/* A wrapper for use as a VMState .put function */
+void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size)
+{
+ virtio_save(VIRTIO_DEVICE(opaque), f);
+}
+
static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
{
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
@@ -1804,8 +1830,7 @@ static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
}
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
- void (*handle_output)(VirtIODevice *,
- VirtQueue *))
+ VirtIOHandleOutput handle_output)
{
if (handle_output) {
vq->handle_aio_output = handle_output;
@@ -1831,11 +1856,21 @@ static void virtio_queue_host_notifier_read(EventNotifier *n)
void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
bool set_handler)
{
+ AioContext *ctx = qemu_get_aio_context();
if (assign && set_handler) {
- event_notifier_set_handler(&vq->host_notifier, true,
+ if (vq->use_aio) {
+ aio_set_event_notifier(ctx, &vq->host_notifier, true,
virtio_queue_host_notifier_read);
+ } else {
+ event_notifier_set_handler(&vq->host_notifier, true,
+ virtio_queue_host_notifier_read);
+ }
} else {
- event_notifier_set_handler(&vq->host_notifier, true, NULL);
+ if (vq->use_aio) {
+ aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
+ } else {
+ event_notifier_set_handler(&vq->host_notifier, true, NULL);
+ }
}
if (!assign) {
/* Test and clear notifier before after disabling event,