diff options
Diffstat (limited to 'hw/virtio')
-rw-r--r-- | hw/virtio/Kconfig | 5 | ||||
-rw-r--r-- | hw/virtio/meson.build | 25 | ||||
-rw-r--r-- | hw/virtio/trace-events | 1 | ||||
-rw-r--r-- | hw/virtio/vdpa-dev.c | 7 | ||||
-rw-r--r-- | hw/virtio/vhost-backend.c | 62 | ||||
-rw-r--r-- | hw/virtio/vhost-user-test-device-pci.c (renamed from hw/virtio/vhost-user-device-pci.c) | 17 | ||||
-rw-r--r-- | hw/virtio/vhost-user-test-device.c (renamed from hw/virtio/vhost-user-device.c) | 9 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 5 | ||||
-rw-r--r-- | hw/virtio/vhost-vdpa.c | 116 | ||||
-rw-r--r-- | hw/virtio/vhost-vsock.c | 8 | ||||
-rw-r--r-- | hw/virtio/vhost.c | 181 | ||||
-rw-r--r-- | hw/virtio/virtio-balloon.c | 1 | ||||
-rw-r--r-- | hw/virtio/virtio-bus.c | 11 | ||||
-rw-r--r-- | hw/virtio/virtio-config-io.c | 1 | ||||
-rw-r--r-- | hw/virtio/virtio-hmp-cmds.c | 3 | ||||
-rw-r--r-- | hw/virtio/virtio-mem.c | 85 | ||||
-rw-r--r-- | hw/virtio/virtio-mmio.c | 5 | ||||
-rw-r--r-- | hw/virtio/virtio-pci.c | 120 | ||||
-rw-r--r-- | hw/virtio/virtio-qmp.c | 91 | ||||
-rw-r--r-- | hw/virtio/virtio-qmp.h | 3 | ||||
-rw-r--r-- | hw/virtio/virtio.c | 230 |
21 files changed, 635 insertions, 351 deletions
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 7648a2d..10f5c53 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -126,3 +126,8 @@ config VHOST_USER_SCMI bool default y depends on VIRTIO && VHOST_USER && ARM + +config VHOST_USER_TEST + bool + default y + depends on VIRTIO && VHOST_USER diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 164f6fd..48b9fed 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -1,6 +1,7 @@ system_virtio_ss = ss.source_set() system_virtio_ss.add(files('virtio-bus.c')) system_virtio_ss.add(files('iothread-vq-mapping.c')) +system_virtio_ss.add(files('virtio-config-io.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) @@ -10,18 +11,18 @@ system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c') specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) -specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) +specific_virtio_ss.add(files('virtio-qmp.c')) if have_vhost system_virtio_ss.add(files('vhost.c')) - specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) + system_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) system_virtio_ss.add(files('vhost-user-base.c')) # MMIO Stubs - system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_TEST', if_true: files('vhost-user-test-device.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) @@ -29,7 +30,8 @@ if have_vhost system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) # PCI Stubs - system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_TEST'], + if_true: files('vhost-user-test-device-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], @@ -43,22 +45,22 @@ if have_vhost endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) - specific_virtio_ss.add(files('vhost-shadow-virtqueue.c')) + system_virtio_ss.add(files('vhost-shadow-virtqueue.c')) endif else system_virtio_ss.add(files('vhost-stub.c')) endif +system_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm.c', 'cbor-helpers.c'), libcbor]) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) -specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: files('virtio-nsm.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('cbor-helpers.c'), libcbor]) +system_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -67,6 +69,7 @@ virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk- virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c')) @@ -85,7 +88,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) -specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) +system_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 76f0d45..658cc36 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -75,7 +75,6 @@ virtqueue_flush(void *vq, unsigned int count) "vq %p count %u" virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u" virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p" virtio_notify_irqfd_deferred_fn(void *vdev, void *vq) "vdev %p vq %p" -virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p" virtio_notify(void *vdev, void *vq) "vdev %p vq %p" virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u" diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index d1da40a..4a7b970 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -338,6 +338,12 @@ static int vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) return 0; } +static struct vhost_dev *vhost_vdpa_device_get_vhost(VirtIODevice *vdev) +{ + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + return &s->dev; +} + static const Property vhost_vdpa_device_properties[] = { DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev), DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), @@ -369,6 +375,7 @@ static void vhost_vdpa_device_class_init(ObjectClass *klass, const void *data) vdc->set_config = vhost_vdpa_device_set_config; vdc->get_features = vhost_vdpa_device_get_features; vdc->set_status = vhost_vdpa_device_set_status; + vdc->get_vhost = vhost_vdpa_device_get_vhost; } static void vhost_vdpa_device_instance_init(Object *obj) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 833804d..4367db0 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -20,6 +20,11 @@ #include <linux/vhost.h> #include <sys/ioctl.h> +struct vhost_features { + uint64_t count; + uint64_t features[VIRTIO_FEATURES_NU64S]; +}; + static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, void *arg) { @@ -182,12 +187,6 @@ static int vhost_kernel_get_vring_worker(struct vhost_dev *dev, return vhost_kernel_call(dev, VHOST_GET_VRING_WORKER, worker); } -static int vhost_kernel_set_features(struct vhost_dev *dev, - uint64_t features) -{ - return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features); -} - static int vhost_kernel_set_backend_cap(struct vhost_dev *dev) { uint64_t features; @@ -210,10 +209,51 @@ static int vhost_kernel_set_backend_cap(struct vhost_dev *dev) return 0; } -static int vhost_kernel_get_features(struct vhost_dev *dev, - uint64_t *features) +static int vhost_kernel_set_features(struct vhost_dev *dev, + const uint64_t *features) { - return vhost_kernel_call(dev, VHOST_GET_FEATURES, features); + struct vhost_features farray; + bool extended_in_use; + int r; + + farray.count = VIRTIO_FEATURES_NU64S; + virtio_features_copy(farray.features, features); + extended_in_use = virtio_features_use_ex(farray.features); + + /* + * Can't check for ENOTTY: for unknown ioctls the kernel interprets + * the argument as a virtio queue id and most likely errors out validating + * such id, instead of reporting an unknown operation. + */ + r = vhost_kernel_call(dev, VHOST_SET_FEATURES_ARRAY, &farray); + if (!r) { + return 0; + } + + if (extended_in_use) { + error_report("Trying to set extended features without kernel support"); + return -EINVAL; + } + return vhost_kernel_call(dev, VHOST_SET_FEATURES, &farray.features[0]); +} + +static int vhost_kernel_get_features(struct vhost_dev *dev, uint64_t *features) +{ + struct vhost_features farray; + int r; + + farray.count = VIRTIO_FEATURES_NU64S; + r = vhost_kernel_call(dev, VHOST_GET_FEATURES_ARRAY, &farray); + if (r) { + memset(&farray, 0, sizeof(farray)); + r = vhost_kernel_call(dev, VHOST_GET_FEATURES, &farray.features[0]); + } + if (r) { + return r; + } + + virtio_features_copy(features, farray.features); + return 0; } static int vhost_kernel_set_owner(struct vhost_dev *dev) @@ -341,8 +381,8 @@ const VhostOps kernel_ops = { .vhost_attach_vring_worker = vhost_kernel_attach_vring_worker, .vhost_new_worker = vhost_kernel_new_worker, .vhost_free_worker = vhost_kernel_free_worker, - .vhost_set_features = vhost_kernel_set_features, - .vhost_get_features = vhost_kernel_get_features, + .vhost_set_features_ex = vhost_kernel_set_features, + .vhost_get_features_ex = vhost_kernel_get_features, .vhost_set_backend_cap = vhost_kernel_set_backend_cap, .vhost_set_owner = vhost_kernel_set_owner, .vhost_get_vq_index = vhost_kernel_get_vq_index, diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-test-device-pci.c index f10bac8..b4ed0ef 100644 --- a/hw/virtio/vhost-user-device-pci.c +++ b/hw/virtio/vhost-user-test-device-pci.c @@ -18,13 +18,13 @@ struct VHostUserDevicePCI { VHostUserBase vub; }; -#define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base" +#define TYPE_VHOST_USER_TEST_DEVICE_PCI "vhost-user-test-device-pci-base" -OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI) +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_TEST_DEVICE_PCI) static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { - VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev); + VHostUserDevicePCI *dev = VHOST_USER_TEST_DEVICE_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vub); vpci_dev->nvectors = 1; @@ -38,9 +38,6 @@ static void vhost_user_device_pci_class_init(ObjectClass *klass, VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); - /* Reason: stop users confusing themselves */ - dc->user_creatable = false; - k->realize = vhost_user_device_pci_realize; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; @@ -51,15 +48,15 @@ static void vhost_user_device_pci_class_init(ObjectClass *klass, static void vhost_user_device_pci_instance_init(Object *obj) { - VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(obj); + VHostUserDevicePCI *dev = VHOST_USER_TEST_DEVICE_PCI(obj); virtio_instance_init_common(obj, &dev->vub, sizeof(dev->vub), - TYPE_VHOST_USER_DEVICE); + TYPE_VHOST_USER_TEST_DEVICE); } static const VirtioPCIDeviceTypeInfo vhost_user_device_pci_info = { - .base_name = TYPE_VHOST_USER_DEVICE_PCI, - .non_transitional_name = "vhost-user-device-pci", + .base_name = TYPE_VHOST_USER_TEST_DEVICE_PCI, + .non_transitional_name = "vhost-user-test-device-pci", .instance_size = sizeof(VHostUserDevicePCI), .instance_init = vhost_user_device_pci_instance_init, .class_init = vhost_user_device_pci_class_init, diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-test-device.c index 3939bdf..1b98ea3 100644 --- a/hw/virtio/vhost-user-device.c +++ b/hw/virtio/vhost-user-test-device.c @@ -1,5 +1,5 @@ /* - * Generic vhost-user-device implementation for any vhost-user-backend + * Generic vhost-user-test-device implementation for any vhost-user-backend * * This is a concrete implementation of vhost-user-base which can be * configured via properties. It is useful for development and @@ -25,7 +25,7 @@ */ static const VMStateDescription vud_vmstate = { - .name = "vhost-user-device", + .name = "vhost-user-test-device", .unmigratable = 1, }; @@ -41,16 +41,13 @@ static void vud_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - /* Reason: stop inexperienced users confusing themselves */ - dc->user_creatable = false; - device_class_set_props(dc, vud_properties); dc->vmsd = &vud_vmstate; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } static const TypeInfo vud_info = { - .name = TYPE_VHOST_USER_DEVICE, + .name = TYPE_VHOST_USER_TEST_DEVICE, .parent = TYPE_VHOST_USER_BASE, .class_init = vud_class_init, }; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 1e1d6b0..36c9c2e 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2039,7 +2039,10 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) error_setg(errp, "%s: Failed to get ufd", __func__); return -EIO; } - qemu_socket_set_nonblock(ufd); + if (!qemu_set_blocking(ufd, false, errp)) { + close(ufd); + return -EINVAL; + } /* register ufd with userfault thread */ u->postcopy_fd.fd = ufd; diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 1ab2c11..7061b6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) int ret; Int128 llend; Error *local_err = NULL; + MemoryRegion *mr; + hwaddr xlat; if (iotlb->target_as != &address_space_memory) { error_report("Wrong target AS \"%s\", only system memory is allowed", @@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL, - &local_err)) { + mr = memory_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); return; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { @@ -594,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) v->shadow_vqs = g_steal_pointer(&shadow_vqs); } +static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | + 0x1ULL << VHOST_BACKEND_F_SUSPEND; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { + return -EFAULT; + } + + features &= f; + + if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; + } + } + + dev->backend_cap = features; + v->shared->backend_cap = features; + + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v = opaque; @@ -603,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; - v->shared->listener = vhost_vdpa_memory_listener; + + ret = vhost_vdpa_set_backend_cap(dev); + if (unlikely(ret != 0)) { + return ret; + } + vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -639,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); + v->shared->listener = vhost_vdpa_memory_listener; return 0; } @@ -841,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); } -static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) -{ - struct vhost_vdpa *v = dev->opaque; - - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | - 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | - 0x1ULL << VHOST_BACKEND_F_SUSPEND; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { - return -EFAULT; - } - - features &= f; - - if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; - } - } - - dev->backend_cap = features; - v->shared->backend_cap = features; - - return 0; -} - static int vhost_vdpa_get_device_id(struct vhost_dev *dev, uint32_t *device_id) { @@ -888,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev); + if (ret) { + return ret; + } + + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; v->suspended = false; - return ret; + return 0; } static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) @@ -1373,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + if (v->shared->listener_registered && + dev->vdev->dma_as != v->shared->listener.address_space) { + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; + } + if (!v->shared->listener_registered) { + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + v->shared->listener_registered = true; + } return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1383,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static void vhost_vdpa_reset_status(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; - if (!vhost_vdpa_last_dev(dev)) { return; } @@ -1392,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1526,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { + int r; + struct vhost_vdpa *v; + if (!vhost_vdpa_first_dev(dev)) { return 0; } trace_vhost_vdpa_set_owner(dev); - return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + if (unlikely(r < 0)) { + return r; + } + + /* + * Being optimistic and listening address space memory. If the device + * uses vIOMMU, it is changed at vhost_vdpa_dev_start. + */ + v = dev->opaque; + memory_listener_register(&v->shared->listener, &address_space_memory); + v->shared->listener_registered = true; + return 0; } static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, @@ -1563,7 +1600,6 @@ const VhostOps vdpa_ops = { .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, .vhost_set_vring_call = vhost_vdpa_set_vring_call, .vhost_get_features = vhost_vdpa_get_features, - .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, .vhost_set_owner = vhost_vdpa_set_owner, .vhost_set_vring_endian = NULL, .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 6e40888..107d88b 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -147,9 +147,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) return; } - if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { - error_setg_errno(errp, errno, - "vhost-vsock: unable to set non-blocking mode"); + if (!qemu_set_blocking(vhostfd, false, errp)) { return; } } else { @@ -160,9 +158,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) return; } - if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { - error_setg_errno(errp, errno, - "Failed to set FD nonblocking"); + if (!qemu_set_blocking(vhostfd, false, errp)) { return; } } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index fc43853..266a115 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,6 +27,7 @@ #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "system/dma.h" +#include "system/memory.h" #include "trace.h" /* enabled until disconnected backend stabilizes */ @@ -47,12 +48,6 @@ static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; -/* Memslots used by backends that support private memslots (without an fd). */ -static unsigned int used_memslots; - -/* Memslots used by backends that only support shared memslots (with an fd). */ -static unsigned int used_shared_memslots; - static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); @@ -74,15 +69,15 @@ unsigned int vhost_get_free_memslots(void) QLIST_FOREACH(hdev, &vhost_devices, entry) { unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - unsigned int cur_free; + unsigned int cur_free = r - hdev->mem->nregions; - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - cur_free = r - used_shared_memslots; + if (unlikely(r < hdev->mem->nregions)) { + warn_report_once("used (%u) vhost backend memory slots exceed" + " the device limit (%u).", hdev->mem->nregions, r); + free = 0; } else { - cur_free = r - used_memslots; + free = MIN(free, cur_free); } - free = MIN(free, cur_free); } return free; } @@ -461,7 +456,8 @@ static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, hwaddr *plen, bool is_write) { if (!vhost_dev_has_iommu(dev)) { - return cpu_physical_memory_map(addr, plen, is_write); + return address_space_map(dev->vdev->dma_as, addr, plen, is_write, + MEMTXATTRS_UNSPECIFIED); } else { return (void *)(uintptr_t)addr; } @@ -472,7 +468,8 @@ static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer, hwaddr access_len) { if (!vhost_dev_has_iommu(dev)) { - cpu_physical_memory_unmap(buffer, len, is_write, access_len); + address_space_unmap(dev->vdev->dma_as, buffer, len, is_write, + access_len); } } @@ -666,13 +663,6 @@ static void vhost_commit(MemoryListener *listener) dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - if (dev->vhost_ops->vhost_backend_no_private_memslots && - dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { - used_shared_memslots = dev->mem->nregions; - } else { - used_memslots = dev->mem->nregions; - } - for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -985,20 +975,34 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) { - uint64_t features = dev->acked_features; + uint64_t features[VIRTIO_FEATURES_NU64S]; int r; + + virtio_features_copy(features, dev->acked_features_ex); if (enable_log) { - features |= 0x1ULL << VHOST_F_LOG_ALL; + virtio_add_feature_ex(features, VHOST_F_LOG_ALL); } if (!vhost_dev_has_iommu(dev)) { - features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM); + virtio_clear_feature_ex(features, VIRTIO_F_IOMMU_PLATFORM); } if (dev->vhost_ops->vhost_force_iommu) { if (dev->vhost_ops->vhost_force_iommu(dev) == true) { - features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM; + virtio_add_feature_ex(features, VIRTIO_F_IOMMU_PLATFORM); } } - r = dev->vhost_ops->vhost_set_features(dev, features); + + if (virtio_features_use_ex(features) && + !dev->vhost_ops->vhost_set_features_ex) { + r = -EINVAL; + VHOST_OPS_DEBUG(r, "extended features without device support"); + goto out; + } + + if (dev->vhost_ops->vhost_set_features_ex) { + r = dev->vhost_ops->vhost_set_features_ex(dev, features); + } else { + r = dev->vhost_ops->vhost_set_features(dev, features[0]); + } if (r < 0) { VHOST_OPS_DEBUG(r, "vhost_set_features failed"); goto out; @@ -1123,7 +1127,8 @@ static bool vhost_log_global_start(MemoryListener *listener, Error **errp) r = vhost_migration_log(listener, true); if (r < 0) { - abort(); + error_setg_errno(errp, -r, "vhost: Failed to start logging"); + return false; } return true; } @@ -1134,7 +1139,8 @@ static void vhost_log_global_stop(MemoryListener *listener) r = vhost_migration_log(listener, false); if (r < 0) { - abort(); + /* Not fatal, so report it, but take no further action */ + warn_report("vhost: Failed to stop logging"); } } @@ -1367,25 +1373,30 @@ fail_alloc_desc: return r; } -int vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +static int do_vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx, bool force) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; - int r; + int r = 0; if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ return 0; } - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); - if (r < 0) { - VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); + if (!force) { + r = dev->vhost_ops->vhost_get_vring_base(dev, &state); + if (r < 0) { + VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); + } + } + + if (r < 0 || force) { /* Connection to the backend is broken, so let's sync internal * last avail idx to the device used idx. */ @@ -1414,6 +1425,14 @@ int vhost_virtqueue_stop(struct vhost_dev *dev, return r; } +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) +{ + return do_vhost_virtqueue_stop(dev, vdev, vq, idx, false); +} + static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, int n, uint32_t timeout) { @@ -1506,12 +1525,27 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) } } +static int vhost_dev_get_features(struct vhost_dev *hdev, + uint64_t *features) +{ + uint64_t features64; + int r; + + if (hdev->vhost_ops->vhost_get_features_ex) { + return hdev->vhost_ops->vhost_get_features_ex(hdev, features); + } + + r = hdev->vhost_ops->vhost_get_features(hdev, &features64); + virtio_features_from_u64(features, features64); + return r; +} + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout, Error **errp) { + uint64_t features[VIRTIO_FEATURES_NU64S]; unsigned int used, reserved, limit; - uint64_t features; int i, r, n_initialized_vqs = 0; hdev->vdev = NULL; @@ -1531,7 +1565,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, goto fail; } - r = hdev->vhost_ops->vhost_get_features(hdev, &features); + r = vhost_dev_get_features(hdev, features); if (r < 0) { error_setg_errno(errp, -r, "vhost_get_features failed"); goto fail; @@ -1569,7 +1603,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } } - hdev->features = features; + virtio_features_copy(hdev->features_ex, features); hdev->memory_listener = (MemoryListener) { .name = "vhost", @@ -1592,7 +1626,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, }; if (hdev->migration_blocker == NULL) { - if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { + if (!virtio_has_feature_ex(hdev->features_ex, VHOST_F_LOG_ALL)) { error_setg(&hdev->migration_blocker, "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) { @@ -1619,15 +1653,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); /* - * The listener we registered properly updated the corresponding counter. - * So we can trust that these values are accurate. + * The listener we registered properly setup the number of required + * memslots in vhost_commit(). */ - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - used = used_shared_memslots; - } else { - used = used_memslots; - } + used = hdev->mem->nregions; + /* * We assume that all reserved memslots actually require a real memslot * in our vhost backend. This might not be true, for example, if the @@ -1819,7 +1849,7 @@ void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) int r; EventNotifier *notifier = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; - EventNotifier *config_notifier = &vdev->config_notifier; + EventNotifier *config_notifier = virtio_config_get_guest_notifier(vdev); assert(hdev->vhost_ops); if ((hdev->started == false) || @@ -1850,39 +1880,40 @@ static void vhost_stop_config_intr(struct vhost_dev *dev) static void vhost_start_config_intr(struct vhost_dev *dev) { int r; + EventNotifier *config_notifier = + virtio_config_get_guest_notifier(dev->vdev); assert(dev->vhost_ops); - int fd = event_notifier_get_fd(&dev->vdev->config_notifier); + int fd = event_notifier_get_fd(config_notifier); if (dev->vhost_ops->vhost_set_config_call) { r = dev->vhost_ops->vhost_set_config_call(dev, fd); if (!r) { - event_notifier_set(&dev->vdev->config_notifier); + event_notifier_set(config_notifier); } } } -uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, - uint64_t features) +void vhost_get_features_ex(struct vhost_dev *hdev, + const int *feature_bits, + uint64_t *features) { const int *bit = feature_bits; + while (*bit != VHOST_INVALID_FEATURE_BIT) { - uint64_t bit_mask = (1ULL << *bit); - if (!(hdev->features & bit_mask)) { - features &= ~bit_mask; + if (!virtio_has_feature_ex(hdev->features_ex, *bit)) { + virtio_clear_feature_ex(features, *bit); } bit++; } - return features; } -void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, - uint64_t features) +void vhost_ack_features_ex(struct vhost_dev *hdev, const int *feature_bits, + const uint64_t *features) { const int *bit = feature_bits; while (*bit != VHOST_INVALID_FEATURE_BIT) { - uint64_t bit_mask = (1ULL << *bit); - if (features & bit_mask) { - hdev->acked_features |= bit_mask; + if (virtio_has_feature_ex(features, *bit)) { + virtio_add_feature_ex(hdev->acked_features_ex, *bit); } bit++; } @@ -2136,16 +2167,18 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +static int do_vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, + bool vrings, bool force) { int i; int rc = 0; + EventNotifier *config_notifier = virtio_config_get_guest_notifier(vdev); /* should only be called after backend is connected */ assert(hdev->vhost_ops); event_notifier_test_and_clear( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); - event_notifier_test_and_clear(&vdev->config_notifier); + event_notifier_test_and_clear(config_notifier); event_notifier_cleanup( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); @@ -2158,10 +2191,11 @@ int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - rc |= vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= do_vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i, + force); } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2181,6 +2215,17 @@ int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) return rc; } +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +{ + return do_vhost_dev_stop(hdev, vdev, vrings, false); +} + +int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev, + bool vrings) +{ + return do_vhost_dev_stop(hdev, vdev, vrings, true); +} + int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file) { diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index db787d0..02cdd80 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -23,6 +23,7 @@ #include "hw/qdev-properties.h" #include "hw/boards.h" #include "system/balloon.h" +#include "system/ramblock.h" #include "hw/virtio/virtio-balloon.h" #include "system/address-spaces.h" #include "qapi/error.h" diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 11adfbf..cef944e 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -62,9 +62,14 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) } /* Get the features of the plugged device. */ - assert(vdc->get_features != NULL); - vdev->host_features = vdc->get_features(vdev, vdev->host_features, - &local_err); + if (vdc->get_features_ex) { + vdc->get_features_ex(vdev, vdev->host_features_ex, &local_err); + } else { + assert(vdc->get_features != NULL); + virtio_features_from_u64(vdev->host_features_ex, + vdc->get_features(vdev, vdev->host_features, + &local_err)); + } if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/virtio/virtio-config-io.c b/hw/virtio/virtio-config-io.c index ad78e0b..f58d90b 100644 --- a/hw/virtio/virtio-config-io.c +++ b/hw/virtio/virtio-config-io.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "hw/virtio/virtio.h" -#include "cpu.h" uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) { diff --git a/hw/virtio/virtio-hmp-cmds.c b/hw/virtio/virtio-hmp-cmds.c index 7d8677b..1daae48 100644 --- a/hw/virtio/virtio-hmp-cmds.c +++ b/hw/virtio/virtio-hmp-cmds.c @@ -74,7 +74,8 @@ static void hmp_virtio_dump_features(Monitor *mon, } if (features->has_unknown_dev_features) { - monitor_printf(mon, " unknown-features(0x%016"PRIx64")\n", + monitor_printf(mon, " unknown-features(0x%016"PRIx64"%016"PRIx64")\n", + features->unknown_dev_features2, features->unknown_dev_features); } } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index a3d1a67..15ba679 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -17,6 +17,7 @@ #include "qemu/units.h" #include "system/numa.h" #include "system/system.h" +#include "system/ramblock.h" #include "system/reset.h" #include "system/runstate.h" #include "hw/virtio/virtio.h" @@ -24,7 +25,6 @@ #include "hw/virtio/virtio-mem.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "system/ram_addr.h" #include "migration/misc.h" #include "hw/boards.h" #include "hw/qdev-properties.h" @@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, return ret; } -/* - * Adjust the memory section to cover the intersection with the given range. - * - * Returns false if the intersection is empty, otherwise returns true. - */ -static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, - uint64_t offset, uint64_t size) -{ - uint64_t start = MAX(s->offset_within_region, offset); - uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), - offset + size); - - if (end <= start) { - return false; - } - - s->offset_within_address_space += start - s->offset_within_region; - s->offset_within_region = start; - s->size = int128_make64(end - start); - return true; -} - typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, @@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl->notify_discard(rdl, &tmp); @@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } ret = rdl->notify_populate(rdl, &tmp); @@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, if (rdl2 == rdl) { break; } - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl2->notify_discard(rdl2, &tmp); @@ -1070,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) } /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, + RAM_DISCARD_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; + } + + /* * We don't know at this point whether shared RAM is migrated using * QEMU or migrated using the file content. "x-ignore-shared" will be * configured after realizing the device. So in case we have an @@ -1083,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); return; } @@ -1144,13 +1134,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj); vmem->system_reset->vmem = vmem; qemu_register_resettable(obj); - - /* - * Set ourselves as RamDiscardManager before the plug handler maps the - * memory region and exposes it via an address space. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, - RAM_DISCARD_MANAGER(vmem)); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -1158,12 +1141,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - /* - * The unplug handler unmapped the memory region, it cannot be - * found via an address space anymore. Unset ourselves. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); - qemu_unregister_resettable(OBJECT(vmem->system_reset)); object_unref(OBJECT(vmem->system_reset)); @@ -1176,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); } @@ -1750,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, } struct VirtIOMEMReplayData { - void *fn; + ReplayRamDiscardState fn; void *opaque; }; @@ -1758,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) { struct VirtIOMEMReplayData *data = arg; - return ((ReplayRamPopulate)data->fn)(s, data->opaque); + return data->fn(s, data->opaque); } static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *s, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); @@ -1782,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, { struct VirtIOMEMReplayData *data = arg; - ((ReplayRamDiscard)data->fn)(s, data->opaque); - return 0; + return data->fn(s, data->opaque); } -static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *s, - ReplayRamDiscard replay_fn, - void *opaque) +static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscardState replay_fn, + void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); struct VirtIOMEMReplayData data = { @@ -1798,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, }; g_assert(s->mr == &vmem->memdev->mr); - virtio_mem_for_each_unplugged_section(vmem, s, &data, - virtio_mem_rdm_replay_discarded_cb); + return virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); } static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 532c671..fb58c36 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -34,6 +34,7 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "trace.h" +#include "qapi/error.h" static bool virtio_mmio_ioeventfd_enabled(DeviceState *d) { @@ -612,14 +613,14 @@ static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f) { VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); - vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL); + vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL, &error_fatal); } static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f) { VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); - return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1); + return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1, &error_fatal); } static bool virtio_mmio_has_extra_state(DeviceState *opaque) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 9b48aa8..937e22f 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -30,9 +30,11 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" +#include "qemu/bswap.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/loader.h" +#include "system/accel-irq.h" #include "system/kvm.h" #include "hw/virtio/virtio-pci.h" #include "qemu/range.h" @@ -108,6 +110,29 @@ static const VMStateDescription vmstate_virtio_pci_modern_queue_state = { } }; +static bool virtio_pci_modern_state_features128_needed(void *opaque) +{ + VirtIOPCIProxy *proxy = opaque; + uint32_t features = 0; + int i; + + for (i = 2; i < ARRAY_SIZE(proxy->guest_features); ++i) { + features |= proxy->guest_features[i]; + } + return features; +} + +static const VMStateDescription vmstate_virtio_pci_modern_state_features128 = { + .name = "virtio_pci/modern_state/features128", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_pci_modern_state_features128_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 2, 2), + VMSTATE_END_OF_LIST() + } +}; + static bool virtio_pci_modern_state_needed(void *opaque) { VirtIOPCIProxy *proxy = opaque; @@ -115,6 +140,12 @@ static bool virtio_pci_modern_state_needed(void *opaque) return virtio_pci_modern(proxy); } +/* + * Avoid silently breaking migration should the feature space increase + * even more in the (far away) future + */ +QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU32S != 4); + static const VMStateDescription vmstate_virtio_pci_modern_state_sub = { .name = "virtio_pci/modern_state", .version_id = 1, @@ -123,11 +154,15 @@ static const VMStateDescription vmstate_virtio_pci_modern_state_sub = { .fields = (const VMStateField[]) { VMSTATE_UINT32(dfselect, VirtIOPCIProxy), VMSTATE_UINT32(gfselect, VirtIOPCIProxy), - VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2), + VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 0, 2), VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0, vmstate_virtio_pci_modern_queue_state, VirtIOPCIQueue), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_virtio_pci_modern_state_features128, + NULL } }; @@ -146,23 +181,21 @@ static const VMStateDescription vmstate_virtio_pci = { static bool virtio_pci_has_extra_state(DeviceState *d) { - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; + return true; } static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL); + vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL, &error_fatal); } static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1); + return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1, &error_fatal); } static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f) @@ -826,11 +859,11 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, if (irqfd->users == 0) { KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); - ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); + ret = accel_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); if (ret < 0) { return ret; } - kvm_irqchip_commit_route_changes(&c); + accel_irqchip_commit_route_changes(&c); irqfd->virq = ret; } irqfd->users++; @@ -842,7 +875,7 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, { VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; if (--irqfd->users == 0) { - kvm_irqchip_release_virq(kvm_state, irqfd->virq); + accel_irqchip_release_virq(irqfd->virq); } } @@ -851,7 +884,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, unsigned int vector) { VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; - return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); + return accel_irqchip_add_irqfd_notifier_gsi(n, NULL, irqfd->virq); } static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, @@ -861,7 +894,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; - ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); + ret = accel_irqchip_remove_irqfd_notifier_gsi(n, irqfd->virq); assert(ret == 0); } static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, @@ -996,12 +1029,12 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, if (proxy->vector_irqfd) { irqfd = &proxy->vector_irqfd[vector]; if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { - ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg, - &proxy->pci_dev); + ret = accel_irqchip_update_msi_route(irqfd->virq, msg, + &proxy->pci_dev); if (ret < 0) { return ret; } - kvm_irqchip_commit_routes(kvm_state); + accel_irqchip_commit_routes(); } } @@ -1215,7 +1248,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, static bool virtio_pci_query_guest_notifiers(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return msix_enabled(&proxy->pci_dev); + + if (msix_enabled(&proxy->pci_dev)) { + return true; + } else { + return pci_irq_disabled(&proxy->pci_dev); + } } static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) @@ -1225,7 +1263,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); int r, n; bool with_irqfd = msix_enabled(&proxy->pci_dev) && - kvm_msi_via_irqfd_enabled(); + accel_msi_via_irqfd_enabled() ; nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX); @@ -1429,7 +1467,7 @@ static void virtio_pci_set_vector(VirtIODevice *vdev, uint16_t new_vector) { bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && - msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); + msix_enabled(&proxy->pci_dev) && accel_msi_via_irqfd_enabled(); if (new_vector == old_vector) { return; @@ -1473,6 +1511,19 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, return virtio_pci_add_mem_cap(proxy, &cap.cap); } +static int virtio_pci_select_max(const VirtIODevice *vdev) +{ + int i; + + for (i = VIRTIO_FEATURES_NU64S - 1; i > 0; i--) { + if (vdev->host_features_ex[i]) { + return (i + 1) * 2; + } + } + + return 2; +} + static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, unsigned size) { @@ -1490,18 +1541,21 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, val = proxy->dfselect; break; case VIRTIO_PCI_COMMON_DF: - if (proxy->dfselect <= 1) { + if (proxy->dfselect < virtio_pci_select_max(vdev)) { VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); - val = (vdev->host_features & ~vdc->legacy_features) >> - (32 * proxy->dfselect); + val = vdev->host_features_ex[proxy->dfselect >> 1] >> + (32 * (proxy->dfselect & 1)); + if (proxy->dfselect <= 1) { + val &= (~vdc->legacy_features) >> (32 * proxy->dfselect); + } } break; case VIRTIO_PCI_COMMON_GFSELECT: val = proxy->gfselect; break; case VIRTIO_PCI_COMMON_GF: - if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) { + if (proxy->gfselect < virtio_pci_select_max(vdev)) { val = proxy->guest_features[proxy->gfselect]; } break; @@ -1584,11 +1638,18 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, proxy->gfselect = val; break; case VIRTIO_PCI_COMMON_GF: - if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) { + if (proxy->gfselect < virtio_pci_select_max(vdev)) { + uint64_t features[VIRTIO_FEATURES_NU64S]; + int i; + proxy->guest_features[proxy->gfselect] = val; - virtio_set_features(vdev, - (((uint64_t)proxy->guest_features[1]) << 32) | - proxy->guest_features[0]); + virtio_features_clear(features); + for (i = 0; i < ARRAY_SIZE(proxy->guest_features); ++i) { + uint64_t cur = proxy->guest_features[i]; + + features[i >> 1] |= cur << ((i & 1) * 32); + } + virtio_set_features_ex(vdev, features); } break; case VIRTIO_PCI_COMMON_MSIX: @@ -2307,6 +2368,8 @@ static void virtio_pci_reset(DeviceState *qdev) virtio_bus_reset(bus); msix_unuse_all_vectors(&proxy->pci_dev); + memset(proxy->guest_features, 0, sizeof(proxy->guest_features)); + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { proxy->vqs[i].enabled = 0; proxy->vqs[i].reset = 0; @@ -2363,12 +2426,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) static const Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy, @@ -2397,8 +2456,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); PCIDevice *pci_dev = &proxy->pci_dev; - if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && - virtio_pci_modern(proxy)) { + if (virtio_pci_modern(proxy)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 3b6377c..b338344 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -325,6 +325,20 @@ static const qmp_virtio_feature_map_t virtio_net_feature_map[] = { FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " "negotiation supported"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, \ + "VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO: Driver can receive GSO over " + "UDP tunnel packets"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, \ + "VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO: Driver can receive GSO over " + "UDP tunnel packets requiring checksum offload for the outer " + "header"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, \ + "VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO: Device can receive GSO over " + "UDP tunnel packets"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, \ + "VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM: Device can receive GSO over " + "UDP tunnel packets requiring checksum offload for the outer " + "header"), { -1, "" } }; #endif @@ -510,6 +524,24 @@ static const qmp_virtio_feature_map_t virtio_gpio_feature_map[] = { list; \ }) +#define CONVERT_FEATURES_EX(type, map, bitmap) \ + ({ \ + type *list = NULL; \ + type *node; \ + for (i = 0; map[i].virtio_bit != -1; i++) { \ + bit = map[i].virtio_bit; \ + if (!virtio_has_feature_ex(bitmap, bit)) { \ + continue; \ + } \ + node = g_new0(type, 1); \ + node->value = g_strdup(map[i].feature_desc); \ + node->next = list; \ + list = node; \ + virtio_clear_feature_ex(bitmap, bit); \ + } \ + list; \ + }) + VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap) { VirtioDeviceStatus *status; @@ -545,109 +577,112 @@ VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap) return vhu_protocols; } -VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) +VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, + const uint64_t *bmap) { + uint64_t bitmap[VIRTIO_FEATURES_NU64S]; VirtioDeviceFeatures *features; uint64_t bit; int i; + virtio_features_copy(bitmap, bmap); features = g_new0(VirtioDeviceFeatures, 1); features->has_dev_features = true; /* transport features */ - features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0, - bitmap); + features->transports = CONVERT_FEATURES_EX(strList, virtio_transport_map, + bitmap); /* device features */ switch (device_id) { #ifdef CONFIG_VIRTIO_SERIAL case VIRTIO_ID_CONSOLE: features->dev_features = - CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_serial_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_BLK case VIRTIO_ID_BLOCK: features->dev_features = - CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_blk_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_GPU case VIRTIO_ID_GPU: features->dev_features = - CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_gpu_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_NET case VIRTIO_ID_NET: features->dev_features = - CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_net_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_SCSI case VIRTIO_ID_SCSI: features->dev_features = - CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_scsi_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_BALLOON case VIRTIO_ID_BALLOON: features->dev_features = - CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_balloon_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_IOMMU case VIRTIO_ID_IOMMU: features->dev_features = - CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_iommu_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_INPUT case VIRTIO_ID_INPUT: features->dev_features = - CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_input_feature_map, bitmap); break; #endif #ifdef CONFIG_VHOST_USER_FS case VIRTIO_ID_FS: features->dev_features = - CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_fs_feature_map, bitmap); break; #endif #ifdef CONFIG_VHOST_VSOCK case VIRTIO_ID_VSOCK: features->dev_features = - CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_vsock_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_CRYPTO case VIRTIO_ID_CRYPTO: features->dev_features = - CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_crypto_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_MEM case VIRTIO_ID_MEM: features->dev_features = - CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_mem_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_I2C_ADAPTER case VIRTIO_ID_I2C_ADAPTER: features->dev_features = - CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_i2c_feature_map, bitmap); break; #endif #ifdef CONFIG_VIRTIO_RNG case VIRTIO_ID_RNG: features->dev_features = - CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_rng_feature_map, bitmap); break; #endif #ifdef CONFIG_VHOST_USER_GPIO case VIRTIO_ID_GPIO: features->dev_features = - CONVERT_FEATURES(strList, virtio_gpio_feature_map, 0, bitmap); + CONVERT_FEATURES_EX(strList, virtio_gpio_feature_map, bitmap); break; #endif /* No features */ @@ -680,10 +715,9 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) g_assert_not_reached(); } - features->has_unknown_dev_features = bitmap != 0; - if (features->has_unknown_dev_features) { - features->unknown_dev_features = bitmap; - } + features->has_unknown_dev_features = !virtio_features_empty(bitmap); + features->unknown_dev_features = bitmap[0]; + features->unknown_dev_features2 = bitmap[1]; return features; } @@ -743,11 +777,11 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) status->device_id = vdev->device_id; status->vhost_started = vdev->vhost_started; status->guest_features = qmp_decode_features(vdev->device_id, - vdev->guest_features); + vdev->guest_features_ex); status->host_features = qmp_decode_features(vdev->device_id, - vdev->host_features); + vdev->host_features_ex); status->backend_features = qmp_decode_features(vdev->device_id, - vdev->backend_features); + vdev->backend_features_ex); switch (vdev->device_endian) { case VIRTIO_DEVICE_ENDIAN_LITTLE: @@ -785,11 +819,12 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) status->vhost_dev->nvqs = hdev->nvqs; status->vhost_dev->vq_index = hdev->vq_index; status->vhost_dev->features = - qmp_decode_features(vdev->device_id, hdev->features); + qmp_decode_features(vdev->device_id, hdev->features_ex); status->vhost_dev->acked_features = - qmp_decode_features(vdev->device_id, hdev->acked_features); + qmp_decode_features(vdev->device_id, hdev->acked_features_ex); status->vhost_dev->backend_features = - qmp_decode_features(vdev->device_id, hdev->backend_features); + qmp_decode_features(vdev->device_id, hdev->backend_features_ex); + status->vhost_dev->protocol_features = qmp_decode_protocols(hdev->protocol_features); status->vhost_dev->max_queues = hdev->max_queues; diff --git a/hw/virtio/virtio-qmp.h b/hw/virtio/virtio-qmp.h index 245a446..e0a1e49 100644 --- a/hw/virtio/virtio-qmp.h +++ b/hw/virtio/virtio-qmp.h @@ -18,6 +18,7 @@ VirtIODevice *qmp_find_virtio_device(const char *path); VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap); VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap); -VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap); +VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, + const uint64_t *bitmap); #endif diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2e98cec..153ee0a 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -20,7 +20,7 @@ #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "qom/object_interfaces.h" #include "hw/core/cpu.h" #include "hw/virtio/virtio.h" @@ -31,6 +31,8 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-access.h" #include "system/dma.h" +#include "system/iothread.h" +#include "system/memory.h" #include "system/runstate.h" #include "virtio-qmp.h" @@ -205,6 +207,15 @@ static const char *virtio_id_to_name(uint16_t device_id) return name; } +static void virtio_check_indirect_feature(VirtIODevice *vdev) +{ + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s: indirect_desc was not negotiated!\n", + vdev->name); + } +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -247,7 +258,10 @@ void virtio_init_region_cache(VirtIODevice *vdev, int n) len = address_space_cache_init(&new->desc, vdev->dma_as, addr, size, packed); if (len < size) { - virtio_error(vdev, "Cannot map desc"); + virtio_error(vdev, + "Failed to map descriptor ring for device %s: " + "invalid guest physical address or corrupted queue setup", + qdev_get_printable_name(DEVICE(vdev))); goto err_desc; } @@ -255,7 +269,10 @@ void virtio_init_region_cache(VirtIODevice *vdev, int n) len = address_space_cache_init(&new->used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { - virtio_error(vdev, "Cannot map used"); + virtio_error(vdev, + "Failed to map used ring for device %s: " + "possible guest misconfiguration or insufficient memory", + qdev_get_printable_name(DEVICE(vdev))); goto err_used; } @@ -263,7 +280,10 @@ void virtio_init_region_cache(VirtIODevice *vdev, int n) len = address_space_cache_init(&new->avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { - virtio_error(vdev, "Cannot map avail"); + virtio_error(vdev, + "Failed to map avalaible ring for device %s: " + "possible queue misconfiguration or overlapping memory region", + qdev_get_printable_name(DEVICE(vdev))); goto err_avail; } @@ -929,18 +949,18 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { - unsigned int i, steps, max_steps; + unsigned int i, steps, max_steps, ndescs; i = vq->used_idx % vq->vring.num; steps = 0; /* - * We shouldn't need to increase 'i' by more than the distance - * between used_idx and last_avail_idx. + * We shouldn't need to increase 'i' by more than or equal to + * the distance between used_idx and last_avail_idx (max_steps). */ max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; /* Search for element in vq->used_elems */ - while (steps <= max_steps) { + while (steps < max_steps) { /* Found element, set length and mark as filled */ if (vq->used_elems[i].index == elem->index) { vq->used_elems[i].len = len; @@ -948,8 +968,18 @@ static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, break; } - i += vq->used_elems[i].ndescs; - steps += vq->used_elems[i].ndescs; + ndescs = vq->used_elems[i].ndescs; + + /* Defensive sanity check */ + if (unlikely(ndescs == 0 || ndescs > vq->vring.num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: %s invalid ndescs %u at position %u\n", + __func__, vq->vdev->name, ndescs, i); + return; + } + + i += ndescs; + steps += ndescs; if (i >= vq->vring.num) { i -= vq->vring.num; @@ -1603,7 +1633,8 @@ out: * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to * yet. */ -static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, +static void virtqueue_undo_map_desc(AddressSpace *as, + unsigned int out_num, unsigned int in_num, struct iovec *iov) { unsigned int i; @@ -1611,7 +1642,7 @@ static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, for (i = 0; i < out_num + in_num; i++) { int is_write = i >= out_num; - cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0); + address_space_unmap(as, iov->iov_base, iov->iov_len, is_write, 0); iov++; } } @@ -1680,8 +1711,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingDesc desc; int rc; @@ -1733,6 +1764,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1812,7 +1844,7 @@ done: return elem; err_undo_map: - virtqueue_undo_map_desc(out_num, in_num, iov); + virtqueue_undo_map_desc(vdev->dma_as, out_num, in_num, iov); goto done; } @@ -1826,8 +1858,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingPackedDesc desc; uint16_t id; int rc; @@ -1870,6 +1902,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1961,7 +1994,7 @@ done: return elem; err_undo_map: - virtqueue_undo_map_desc(out_num, in_num, iov); + virtqueue_undo_map_desc(vdev->dma_as, out_num, in_num, iov); goto done; } @@ -2633,16 +2666,8 @@ static void virtio_notify_irqfd_deferred_fn(void *opaque) event_notifier_set(notifier); } -void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) +static void virtio_irq(VirtQueue *vq) { - WITH_RCU_READ_LOCK_GUARD() { - if (!virtio_should_notify(vdev, vq)) { - return; - } - } - - trace_virtio_notify_irqfd(vdev, vq); - /* * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but * windows drivers included in virtio-win 1.8.0 (circa 2015) are @@ -2659,13 +2684,18 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) * to an atomic operation. */ virtio_set_isr(vq->vdev, 0x1); - defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier); -} -static void virtio_irq(VirtQueue *vq) -{ - virtio_set_isr(vq->vdev, 0x1); - virtio_notify_vector(vq->vdev, vq->vector); + /* + * The interrupt code path requires the Big QEMU Lock (BQL), so use the + * notifier instead when in an IOThread. This assumes that device models + * have already called ->set_guest_notifiers() sometime before calling this + * function. + */ + if (qemu_in_iothread()) { + defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier); + } else { + virtio_notify_vector(vq->vdev, vq->vector); + } } void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) @@ -2687,7 +2717,12 @@ void virtio_notify_config(VirtIODevice *vdev) virtio_set_isr(vdev, 0x3); vdev->generation++; - virtio_notify_vector(vdev, vdev->config_vector); + + if (qemu_in_iothread()) { + defer_call(virtio_notify_irqfd_deferred_fn, &vdev->config_notifier); + } else { + virtio_notify_vector(vdev, vdev->config_vector); + } } static bool virtio_device_endian_needed(void *opaque) @@ -2943,6 +2978,30 @@ static const VMStateDescription vmstate_virtio_disabled = { } }; +static bool virtio_128bit_features_needed(void *opaque) +{ + VirtIODevice *vdev = opaque; + + return virtio_features_use_ex(vdev->host_features_ex); +} + +static const VMStateDescription vmstate_virtio_128bit_features = { + .name = "virtio/128bit_features", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_128bit_features_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(guest_features_ex[1], VirtIODevice), + VMSTATE_END_OF_LIST() + } +}; + +/* + * Avoid silently breaking migration should the feature space increase + * even more in the (far away) future + */ +QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU64S != 2); + static const VMStateDescription vmstate_virtio = { .name = "virtio", .version_id = 1, @@ -2952,6 +3011,7 @@ static const VMStateDescription vmstate_virtio = { }, .subsections = (const VMStateDescription * const []) { &vmstate_virtio_device_endian, + &vmstate_virtio_128bit_features, &vmstate_virtio_64bit_features, &vmstate_virtio_virtqueues, &vmstate_virtio_ringsize, @@ -2971,6 +3031,7 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff); int i; + Error *local_err = NULL; if (k->save_config) { k->save_config(qbus->parent, f); @@ -3014,14 +3075,15 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) } if (vdc->vmsd) { - int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL); + int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL, &local_err); if (ret) { + error_report_err(local_err); return ret; } } /* Subsections */ - return vmstate_save_state(f, &vmstate_virtio, vdev, NULL); + return vmstate_save_state(f, &vmstate_virtio, vdev, NULL, &error_fatal); } /* A wrapper for use as a VMState .put function */ @@ -3048,23 +3110,30 @@ const VMStateInfo virtio_vmstate_info = { .put = virtio_device_put, }; -static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) +static int virtio_set_features_nocheck(VirtIODevice *vdev, const uint64_t *val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - bool bad = (val & ~(vdev->host_features)) != 0; + uint64_t tmp[VIRTIO_FEATURES_NU64S]; + bool bad; + + bad = virtio_features_andnot(tmp, val, vdev->host_features_ex); + virtio_features_and(tmp, val, vdev->host_features_ex); - val &= vdev->host_features; - if (k->set_features) { - k->set_features(vdev, val); + if (k->set_features_ex) { + k->set_features_ex(vdev, val); + } else if (k->set_features) { + bad = bad || virtio_features_use_ex(tmp); + k->set_features(vdev, tmp[0]); } - vdev->guest_features = val; + + virtio_features_copy(vdev->guest_features_ex, tmp); return bad ? -1 : 0; } typedef struct VirtioSetFeaturesNocheckData { Coroutine *co; VirtIODevice *vdev; - uint64_t val; + uint64_t val[VIRTIO_FEATURES_NU64S]; int ret; } VirtioSetFeaturesNocheckData; @@ -3077,14 +3146,15 @@ static void virtio_set_features_nocheck_bh(void *opaque) } static int coroutine_mixed_fn -virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) +virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, + const uint64_t *val) { if (qemu_in_coroutine()) { VirtioSetFeaturesNocheckData data = { .co = qemu_coroutine_self(), .vdev = vdev, - .val = val, }; + virtio_features_copy(data.val, val); aio_bh_schedule_oneshot(qemu_get_current_aio_context(), virtio_set_features_nocheck_bh, &data); qemu_coroutine_yield(); @@ -3096,6 +3166,14 @@ virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) int virtio_set_features(VirtIODevice *vdev, uint64_t val) { + uint64_t features[VIRTIO_FEATURES_NU64S]; + + virtio_features_from_u64(features, val); + return virtio_set_features_ex(vdev, features); +} + +int virtio_set_features_ex(VirtIODevice *vdev, const uint64_t *features) +{ int ret; /* * The driver must not attempt to set features after feature negotiation @@ -3105,13 +3183,13 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return -EINVAL; } - if (val & (1ull << VIRTIO_F_BAD_FEATURE)) { + if (features[0] & (1ull << VIRTIO_F_BAD_FEATURE)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n", __func__, vdev->name); } - ret = virtio_set_features_nocheck(vdev, val); + ret = virtio_set_features_nocheck(vdev, features); if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ int i; @@ -3134,6 +3212,7 @@ void virtio_reset(void *opaque) { VirtIODevice *vdev = opaque; VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + uint64_t features[VIRTIO_FEATURES_NU64S]; int i; virtio_set_status(vdev, 0); @@ -3160,7 +3239,8 @@ void virtio_reset(void *opaque) vdev->start_on_kick = false; vdev->started = false; vdev->broken = false; - virtio_set_features_nocheck(vdev, 0); + virtio_features_clear(features); + virtio_set_features_nocheck(vdev, features); vdev->queue_sel = 0; vdev->status = 0; vdev->disabled = false; @@ -3214,6 +3294,7 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + Error *local_err = NULL; /* * We poison the endianness to ensure it does not get used before @@ -3243,7 +3324,7 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) * Note: devices should always test host features in future - don't create * new dependencies like this. */ - vdev->guest_features = features; + virtio_features_from_u64(vdev->guest_features_ex, features); config_len = qemu_get_be32(f); @@ -3259,13 +3340,6 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) config_len--; } - if (vdc->pre_load_queues) { - ret = vdc->pre_load_queues(vdev); - if (ret) { - return ret; - } - } - num = qemu_get_be32(f); if (num > VIRTIO_QUEUE_MAX) { @@ -3273,6 +3347,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) return -1; } + if (vdc->pre_load_queues) { + ret = vdc->pre_load_queues(vdev, num); + if (ret) { + return ret; + } + } + for (i = 0; i < num; i++) { vdev->vq[i].vring.num = qemu_get_be32(f); if (k->has_variable_vring_alignment) { @@ -3306,15 +3387,17 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) } if (vdc->vmsd) { - ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id); + ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id, &local_err); if (ret) { + error_report_err(local_err); return ret; } } /* Subsections */ - ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1); + ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1, &local_err); if (ret) { + error_report_err(local_err); return ret; } @@ -3322,26 +3405,17 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) vdev->device_endian = virtio_default_endian(); } - if (virtio_64bit_features_needed(vdev)) { - /* - * Subsection load filled vdev->guest_features. Run them - * through virtio_set_features to sanity-check them against - * host_features. - */ - uint64_t features64 = vdev->guest_features; - if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) { - error_report("Features 0x%" PRIx64 " unsupported. " - "Allowed features: 0x%" PRIx64, - features64, vdev->host_features); - return -1; - } - } else { - if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) { - error_report("Features 0x%x unsupported. " - "Allowed features: 0x%" PRIx64, - features, vdev->host_features); - return -1; - } + /* + * guest_features_ex is fully initialized with u32 features and upper + * bits have been filled as needed by the later load. + */ + if (virtio_set_features_nocheck_maybe_co(vdev, + vdev->guest_features_ex) < 0) { + error_report("Features 0x" VIRTIO_FEATURES_FMT " unsupported. " + "Allowed features: 0x" VIRTIO_FEATURES_FMT, + VIRTIO_FEATURES_PR(vdev->guest_features_ex), + VIRTIO_FEATURES_PR(vdev->host_features_ex)); + return -1; } if (!virtio_device_started(vdev, vdev->status) && |